Azmathussainthebo commited on
Commit
a29a1ab
·
verified ·
1 Parent(s): 08b9a7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -15
app.py CHANGED
@@ -1,16 +1,23 @@
1
  import os
 
 
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
5
- from langchain_cohere import CohereEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.chains import ConversationalRetrievalChain
9
- from langchain_openai import ChatOpenAI
10
 
11
  # Load environment variables
12
- cohere_api_key = os.environ.get("COHERE_API_KEY")
13
- openai_api_key = os.environ.get("OPENAI_API_KEY")
 
 
 
 
 
14
 
15
  # Function to extract text from PDF files
16
  def get_pdf_text(pdf_docs):
@@ -32,28 +39,36 @@ def get_text_chunks(text):
32
  chunks = text_splitter.split_text(text)
33
  return chunks
34
 
35
- # Function to create a FAISS vectorstore
36
  def get_vectorstore(text_chunks):
37
- embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
38
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
39
  return vectorstore
40
 
41
  # Function to set up the conversational retrieval chain
42
  def get_conversation_chain(vectorstore):
43
- llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=openai_api_key)
44
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
45
- conversation_chain = ConversationalRetrievalChain.from_llm(
46
- llm=llm,
47
- retriever=vectorstore.as_retriever(),
48
- memory=memory
49
- )
50
- return conversation_chain
 
 
 
 
 
 
 
51
 
52
  # Handle user input
53
  def handle_userinput(user_question):
54
  if st.session_state.conversation is not None:
55
  response = st.session_state.conversation({'question': user_question})
56
  st.session_state.chat_history = response['chat_history']
 
57
  for i, message in enumerate(st.session_state.chat_history):
58
  if i % 2 == 0:
59
  st.write(f"*User:* {message.content}")
@@ -64,7 +79,9 @@ def handle_userinput(user_question):
64
 
65
  # Main function to run the Streamlit app
66
  def main():
 
67
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
 
68
  if "conversation" not in st.session_state:
69
  st.session_state.conversation = None
70
  if "chat_history" not in st.session_state:
@@ -77,7 +94,9 @@ def main():
77
 
78
  with st.sidebar:
79
  st.subheader("Your documents")
80
- pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
 
 
81
  if st.button("Process"):
82
  with st.spinner("Processing..."):
83
  raw_text = get_pdf_text(pdf_docs)
 
1
  import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from langchain_groq import ChatGroq
12
 
13
  # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Set up logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s'
20
+ )
21
 
22
  # Function to extract text from PDF files
23
  def get_pdf_text(pdf_docs):
 
39
  chunks = text_splitter.split_text(text)
40
  return chunks
41
 
42
+ # Function to create a FAISS vectorstore using Hugging Face embeddings
43
  def get_vectorstore(text_chunks):
44
+ embeddings = HuggingFaceInstructEmbeddings(model_name="all-MiniLM-L6-v2")
45
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
46
  return vectorstore
47
 
48
  # Function to set up the conversational retrieval chain
49
  def get_conversation_chain(vectorstore):
50
+ try:
51
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
52
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
53
+
54
+ conversation_chain = ConversationalRetrievalChain.from_llm(
55
+ llm=llm,
56
+ retriever=vectorstore.as_retriever(),
57
+ memory=memory
58
+ )
59
+
60
+ logging.info("Conversation chain created successfully.")
61
+ return conversation_chain
62
+ except Exception as e:
63
+ logging.error(f"Error creating conversation chain: {e}")
64
+ st.error("An error occurred while setting up the conversation chain.")
65
 
66
  # Handle user input
67
  def handle_userinput(user_question):
68
  if st.session_state.conversation is not None:
69
  response = st.session_state.conversation({'question': user_question})
70
  st.session_state.chat_history = response['chat_history']
71
+
72
  for i, message in enumerate(st.session_state.chat_history):
73
  if i % 2 == 0:
74
  st.write(f"*User:* {message.content}")
 
79
 
80
  # Main function to run the Streamlit app
81
  def main():
82
+ load_dotenv()
83
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
84
+
85
  if "conversation" not in st.session_state:
86
  st.session_state.conversation = None
87
  if "chat_history" not in st.session_state:
 
94
 
95
  with st.sidebar:
96
  st.subheader("Your documents")
97
+ pdf_docs = st.file_uploader(
98
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
99
+ )
100
  if st.button("Process"):
101
  with st.spinner("Processing..."):
102
  raw_text = get_pdf_text(pdf_docs)