msaifee commited on
Commit
9dfbe9c
·
verified ·
1 Parent(s): 71edb95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -35
app.py CHANGED
@@ -2,31 +2,29 @@ import os
2
  import tempfile
3
  import streamlit as st
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
5
  from langchain.vectorstores import FAISS
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
- from io import BytesIO
9
  from langchain.document_loaders import PyPDFLoader
10
- from transformers import pipeline
11
  from langchain.schema import Document
12
  from dotenv import load_dotenv
13
 
14
- # Load environment variables from Hugging Face Secrets
15
  load_dotenv()
16
 
17
  os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
18
- os.environ['HUGGINGFACE_API_KEY'] = os.getenv("HF_TOKEN")
19
  os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
20
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
21
  os.environ["LANGCHAIN_PROJECT"]="Research-Paper-Summarizer"
22
 
23
  # Streamlit Page Config
24
  st.set_page_config(
25
- page_title="Research Paper Summarizer with DeepSeekR1",
26
  layout="centered"
27
  )
28
 
29
- st.title("📚 Research Paper Summarizer using DeepSeekR1")
30
 
31
  # File Uploader
32
  uploaded_files = st.file_uploader(
@@ -35,21 +33,11 @@ uploaded_files = st.file_uploader(
35
  accept_multiple_files=True
36
  )
37
 
38
- # A placeholder to store vector database (FAISS)
39
  if "vector_store" not in st.session_state:
40
  st.session_state.vector_store = None
41
 
42
- # Hugging Face LLM Model Pipeline
43
- def get_huggingface_pipeline():
44
- st.info("Loading Hugging Face DeepSeekR1 Model... Please wait.")
45
- return pipeline(
46
- "text-generation",
47
- model="deepseek-ai/DeepSeek-R1",
48
- use_auth_token=os.environ['HUGGINGFACE_API_KEY'],
49
- trust_remote_code=True
50
- )
51
-
52
- # Process the PDFs, Create/Update the Vector Store
53
  if st.button("Process PDFs") and uploaded_files:
54
  all_documents = []
55
 
@@ -76,15 +64,14 @@ if st.button("Process PDFs") and uploaded_files:
76
  # Create Document object for each chunk
77
  all_documents.append(Document(page_content=chunk, metadata=doc.metadata))
78
 
79
-
80
- # Create embeddings with Hugging Face
81
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
82
  st.session_state.vector_store = FAISS.from_documents(
83
  documents=all_documents,
84
  embedding=embeddings
85
  )
86
 
87
- st.success("PDFs processed and vector store created!")
88
 
89
  # Query + Summarize
90
  query = st.text_input("Enter your question or summary request:")
@@ -93,26 +80,29 @@ if st.button("Get Summary/Answer"):
93
  if st.session_state.vector_store is None:
94
  st.warning("Please upload and process PDFs first.")
95
  else:
 
96
  retriever = st.session_state.vector_store.as_retriever(
97
  search_type="similarity",
98
  search_kwargs={"k": 5}
99
  )
 
 
 
 
 
 
 
100
 
101
- # Use Hugging Face LLM
102
- hf_pipeline = get_huggingface_pipeline()
103
-
104
- # Retrieve documents and generate response
105
- relevant_docs = retriever.get_relevant_documents(query)
106
- context_text = "\n".join([doc.page_content for doc in relevant_docs])
107
-
108
- # Generate answer using Hugging Face model
109
- response = hf_pipeline(f"Context: {context_text}\nQuestion: {query}", max_length=500, num_return_sequences=1)
110
 
 
111
  st.markdown("### Answer:")
112
- st.write(response[0]['generated_text'])
113
 
114
  with st.expander("Show source documents"):
115
- for i, doc in enumerate(relevant_docs):
116
- st.markdown(f"**Source Document {i + 1}:**")
 
117
  st.write(doc.page_content)
118
  st.write("---")
 
2
  import tempfile
3
  import streamlit as st
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.vectorstores import FAISS
7
+ from langchain.llms import OpenAI
8
  from langchain.chains import RetrievalQA
 
9
  from langchain.document_loaders import PyPDFLoader
 
10
  from langchain.schema import Document
11
  from dotenv import load_dotenv
12
 
13
+ # Load environment variables
14
  load_dotenv()
15
 
16
  os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
 
17
  os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
18
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
19
  os.environ["LANGCHAIN_PROJECT"]="Research-Paper-Summarizer"
20
 
21
  # Streamlit Page Config
22
  st.set_page_config(
23
+ page_title="Research Paper Summarizer",
24
  layout="centered"
25
  )
26
 
27
+ st.title("📚 Research Paper Summarizer")
28
 
29
  # File Uploader
30
  uploaded_files = st.file_uploader(
 
33
  accept_multiple_files=True
34
  )
35
 
36
+ # Initialize vector store in session state
37
  if "vector_store" not in st.session_state:
38
  st.session_state.vector_store = None
39
 
40
+ # Process PDFs and create/update the vector store
 
 
 
 
 
 
 
 
 
 
41
  if st.button("Process PDFs") and uploaded_files:
42
  all_documents = []
43
 
 
64
  # Create Document object for each chunk
65
  all_documents.append(Document(page_content=chunk, metadata=doc.metadata))
66
 
67
+ # Create vector store from documents
68
+ embeddings = OpenAIEmbeddings()
 
69
  st.session_state.vector_store = FAISS.from_documents(
70
  documents=all_documents,
71
  embedding=embeddings
72
  )
73
 
74
+ st.success("PDFs processed and vector store created!")
75
 
76
  # Query + Summarize
77
  query = st.text_input("Enter your question or summary request:")
 
80
  if st.session_state.vector_store is None:
81
  st.warning("Please upload and process PDFs first.")
82
  else:
83
+ # Create retriever and chain
84
  retriever = st.session_state.vector_store.as_retriever(
85
  search_type="similarity",
86
  search_kwargs={"k": 5}
87
  )
88
+ llm = OpenAI(temperature=0.0)
89
+ qa_chain = RetrievalQA.from_chain_type(
90
+ llm=llm,
91
+ chain_type="stuff",
92
+ retriever=retriever,
93
+ return_source_documents=True
94
+ )
95
 
96
+ # Execute query
97
+ result = qa_chain({"query": query})
 
 
 
 
 
 
 
98
 
99
+ # Display the result
100
  st.markdown("### Answer:")
101
+ st.write(result["result"])
102
 
103
  with st.expander("Show source documents"):
104
+ source_docs = result["source_documents"]
105
+ for i, doc in enumerate(source_docs):
106
+ st.markdown(f"**Source Document {i+1}:**")
107
  st.write(doc.page_content)
108
  st.write("---")