Azmathussainthebo commited on
Commit
506eddb
·
verified ·
1 Parent(s): f1ac50a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -40
app.py CHANGED
@@ -1,25 +1,16 @@
1
  import os
2
- import logging
3
- from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
- # from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain_cohere import CohereEmbeddings
9
  from langchain.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
- # from langchain.llms import Ollama
13
- from langchain_groq import ChatGroq
14
 
15
  # Load environment variables
16
- load_dotenv()
17
-
18
- # Set up logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(asctime)s - %(levelname)s - %(message)s'
22
- )
23
 
24
  # Function to extract text from PDF files
25
  def get_pdf_text(pdf_docs):
@@ -42,42 +33,27 @@ def get_text_chunks(text):
42
  return chunks
43
 
44
  # Function to create a FAISS vectorstore
45
- # def get_vectorstore(text_chunks):
46
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
47
- # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
- # return vectorstore
49
-
50
  def get_vectorstore(text_chunks):
51
- cohere_api_key = os.getenv("COHERE_API_KEY")
52
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
53
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
54
  return vectorstore
55
 
56
  # Function to set up the conversational retrieval chain
57
  def get_conversation_chain(vectorstore):
58
- try:
59
- # llm = Ollama(model="llama3.2:1b")
60
- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
61
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
62
-
63
- conversation_chain = ConversationalRetrievalChain.from_llm(
64
- llm=llm,
65
- retriever=vectorstore.as_retriever(),
66
- memory=memory
67
- )
68
-
69
- logging.info("Conversation chain created successfully.")
70
- return conversation_chain
71
- except Exception as e:
72
- logging.error(f"Error creating conversation chain: {e}")
73
- st.error("An error occurred while setting up the conversation chain.")
74
 
75
  # Handle user input
76
  def handle_userinput(user_question):
77
  if st.session_state.conversation is not None:
78
  response = st.session_state.conversation({'question': user_question})
79
  st.session_state.chat_history = response['chat_history']
80
-
81
  for i, message in enumerate(st.session_state.chat_history):
82
  if i % 2 == 0:
83
  st.write(f"*User:* {message.content}")
@@ -88,9 +64,7 @@ def handle_userinput(user_question):
88
 
89
  # Main function to run the Streamlit app
90
  def main():
91
- load_dotenv()
92
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
93
-
94
  if "conversation" not in st.session_state:
95
  st.session_state.conversation = None
96
  if "chat_history" not in st.session_state:
@@ -103,9 +77,7 @@ def main():
103
 
104
  with st.sidebar:
105
  st.subheader("Your documents")
106
- pdf_docs = st.file_uploader(
107
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
108
- )
109
  if st.button("Process"):
110
  with st.spinner("Processing..."):
111
  raw_text = get_pdf_text(pdf_docs)
 
1
  import os
 
 
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import CharacterTextSplitter
 
5
  from langchain_cohere import CohereEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.chains import ConversationalRetrievalChain
9
+ from langchain_openai import ChatOpenAI
 
10
 
11
  # Load environment variables
12
+ cohere_api_key = os.environ.get("COHERE_API_KEY")
13
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
 
 
 
 
 
14
 
15
  # Function to extract text from PDF files
16
  def get_pdf_text(pdf_docs):
 
33
  return chunks
34
 
35
  # Function to create a FAISS vectorstore
 
 
 
 
 
36
  def get_vectorstore(text_chunks):
 
37
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
38
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
39
  return vectorstore
40
 
41
  # Function to set up the conversational retrieval chain
42
  def get_conversation_chain(vectorstore):
43
+ llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=openai_api_key)
44
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
45
+ conversation_chain = ConversationalRetrievalChain.from_llm(
46
+ llm=llm,
47
+ retriever=vectorstore.as_retriever(),
48
+ memory=memory
49
+ )
50
+ return conversation_chain
 
 
 
 
 
 
 
 
51
 
52
  # Handle user input
53
  def handle_userinput(user_question):
54
  if st.session_state.conversation is not None:
55
  response = st.session_state.conversation({'question': user_question})
56
  st.session_state.chat_history = response['chat_history']
 
57
  for i, message in enumerate(st.session_state.chat_history):
58
  if i % 2 == 0:
59
  st.write(f"*User:* {message.content}")
 
64
 
65
  # Main function to run the Streamlit app
66
  def main():
 
67
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
 
68
  if "conversation" not in st.session_state:
69
  st.session_state.conversation = None
70
  if "chat_history" not in st.session_state:
 
77
 
78
  with st.sidebar:
79
  st.subheader("Your documents")
80
+ pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
 
 
81
  if st.button("Process"):
82
  with st.spinner("Processing..."):
83
  raw_text = get_pdf_text(pdf_docs)