Dua Rajper commited on
Commit
7a19be8
·
verified ·
1 Parent(s): 39e042c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -23
app.py CHANGED
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_community.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
@@ -14,10 +14,7 @@ from langchain_groq import ChatGroq
14
  load_dotenv()
15
 
16
  # Set up logging
17
- logging.basicConfig(
18
- level=logging.INFO,
19
- format='%(asctime)s - %(levelname)s - %(message)s'
20
- )
21
 
22
  # Function to extract text from PDF files
23
  def get_pdf_text(pdf_docs):
@@ -25,31 +22,25 @@ def get_pdf_text(pdf_docs):
25
  for pdf in pdf_docs:
26
  pdf_reader = PdfReader(pdf)
27
  for page in pdf_reader.pages:
28
- text += page.extract_text()
29
  return text
30
 
31
- # Function to split the extracted text into chunks
32
  def get_text_chunks(text):
33
- text_splitter = CharacterTextSplitter(
34
- separator="\n",
35
- chunk_size=1000,
36
- chunk_overlap=200,
37
- length_function=len
38
- )
39
  chunks = text_splitter.split_text(text)
40
  return chunks
41
 
42
- # Function to create a FAISS vectorstore
43
  def get_vectorstore(text_chunks):
44
- embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
45
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
46
  return vectorstore
47
 
48
  # Function to set up the conversational retrieval chain
49
  def get_conversation_chain(vectorstore):
50
  try:
51
- groq_api_key = os.getenv("GROQ_API_KEY")
52
- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5, api_key=groq_api_key)
53
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
54
 
55
  conversation_chain = ConversationalRetrievalChain.from_llm(
@@ -81,23 +72,21 @@ def handle_userinput(user_question):
81
  # Main function to run the Streamlit app
82
  def main():
83
  load_dotenv()
84
- st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
85
 
86
  if "conversation" not in st.session_state:
87
  st.session_state.conversation = None
88
  if "chat_history" not in st.session_state:
89
  st.session_state.chat_history = None
90
 
91
- st.header("Chat with multiple PDFs :books:")
92
  user_question = st.text_input("Ask a question about your documents:")
93
  if user_question:
94
  handle_userinput(user_question)
95
 
96
  with st.sidebar:
97
- st.subheader("Your documents")
98
- pdf_docs = st.file_uploader(
99
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
100
- )
101
  if st.button("Process"):
102
  with st.spinner("Processing..."):
103
  raw_text = get_pdf_text(pdf_docs)
 
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
 
14
  load_dotenv()
15
 
16
  # Set up logging
17
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
 
 
18
 
19
  # Function to extract text from PDF files
20
  def get_pdf_text(pdf_docs):
 
22
  for pdf in pdf_docs:
23
  pdf_reader = PdfReader(pdf)
24
  for page in pdf_reader.pages:
25
+ text += page.extract_text() or ""
26
  return text
27
 
28
+ # Function to split extracted text into chunks
29
  def get_text_chunks(text):
30
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
 
 
 
 
 
31
  chunks = text_splitter.split_text(text)
32
  return chunks
33
 
34
+ # Function to create a FAISS vectorstore using Hugging Face Embeddings
35
  def get_vectorstore(text_chunks):
36
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
38
  return vectorstore
39
 
40
  # Function to set up the conversational retrieval chain
41
  def get_conversation_chain(vectorstore):
42
  try:
43
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
 
44
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
45
 
46
  conversation_chain = ConversationalRetrievalChain.from_llm(
 
72
  # Main function to run the Streamlit app
73
  def main():
74
  load_dotenv()
75
+ st.set_page_config(page_title="Chat with PDFs", page_icon="📄")
76
 
77
  if "conversation" not in st.session_state:
78
  st.session_state.conversation = None
79
  if "chat_history" not in st.session_state:
80
  st.session_state.chat_history = None
81
 
82
+ st.header("Chat with your PDFs 📄🤖")
83
  user_question = st.text_input("Ask a question about your documents:")
84
  if user_question:
85
  handle_userinput(user_question)
86
 
87
  with st.sidebar:
88
+ st.subheader("Upload your PDFs")
89
+ pdf_docs = st.file_uploader("Upload PDFs and click 'Process'", accept_multiple_files=True)
 
 
90
  if st.button("Process"):
91
  with st.spinner("Processing..."):
92
  raw_text = get_pdf_text(pdf_docs)