samim2024 commited on
Commit
a3bb3ff
·
verified ·
1 Parent(s): 2134f82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -46
app.py CHANGED
@@ -1,37 +1,33 @@
1
-
2
- from langchain.chains import RetrievalQA
3
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
4
- from langchain.callbacks.manager import CallbackManager
5
- #from langchain_community.llms import Ollama
6
- #from langchain_community.embeddings.ollama import OllamaEmbeddings
7
- from langchain_community.vectorstores import Chroma
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain_community.document_loaders import PyPDFLoader
10
- from langchain.prompts import PromptTemplate
11
- from langchain.memory import ConversationBufferMemory
12
  import streamlit as st
13
  import os
14
  import time
15
  from langchain_community.llms import HuggingFaceEndpoint
16
-
17
-
18
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
 
 
 
 
 
 
19
 
 
20
  model_name = "sentence-transformers/all-mpnet-base-v2"
21
  model_kwargs = {'device': 'cpu'}
22
  encode_kwargs = {'normalize_embeddings': False}
 
23
  embeddings = HuggingFaceEmbeddings(
24
  model_name=model_name,
25
  model_kwargs=model_kwargs,
26
  encode_kwargs=encode_kwargs
27
  )
28
 
29
- if not os.path.exists('files'):
30
- os.mkdir('files')
31
-
32
- if not os.path.exists('jj'):
33
- os.mkdir('jj')
34
 
 
35
  if 'template' not in st.session_state:
36
  st.session_state.template = """You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.Try to give answer in tabular and shortcut.
37
 
@@ -51,51 +47,39 @@ if 'memory' not in st.session_state:
51
  return_messages=True,
52
  input_key="question")
53
  if 'vectorstore' not in st.session_state:
54
- #st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=OllamaEmbeddings(base_url='http://localhost:11434',model="mistral")
55
- st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=embeddings)
56
-
57
  if 'llm' not in st.session_state:
58
- #st.session_state.llm = Ollama(base_url="http://localhost:11434",model="mistral",verbose=True,callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),)
59
  st.session_state.llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9)
60
-
61
- # Initialize session state
62
  if 'chat_history' not in st.session_state:
63
  st.session_state.chat_history = []
64
 
65
  st.title("PDF Chatbot")
66
 
67
- # Upload a PDF file
68
  uploaded_file = st.file_uploader("Upload your PDF", type='pdf')
69
-
70
  for message in st.session_state.chat_history:
71
  with st.chat_message(message["role"]):
72
  st.markdown(message["message"])
73
 
74
  if uploaded_file is not None:
75
- if not os.path.isfile("files/"+uploaded_file.name+".pdf"):
 
76
  with st.status("Analyzing your document..."):
77
  bytes_data = uploaded_file.read()
78
- f = open("files/"+uploaded_file.name+".pdf", "wb")
79
- f.write(bytes_data)
80
- f.close()
81
- loader = PyPDFLoader("files/"+uploaded_file.name+".pdf")
82
  data = loader.load()
83
 
84
- # Initialize text splitter
85
- text_splitter = RecursiveCharacterTextSplitter(
86
- chunk_size=1500,
87
- chunk_overlap=0,
88
- length_function=len
89
- )
90
  all_splits = text_splitter.split_documents(data)
91
 
92
- # Create and persist the vector store
93
- #st.session_state.vectorstore = Chroma.from_documents(documents=all_splits,embedding=OllamaEmbeddings(model="mistral"))
94
- st.session_state.vectorstore = Chroma.from_documents(documents=all_splits,embedding=embeddings)
95
  st.session_state.vectorstore.persist()
96
 
97
  st.session_state.retriever = st.session_state.vectorstore.as_retriever()
98
- # Initialize the QA chain
99
  if 'qa_chain' not in st.session_state:
100
  st.session_state.qa_chain = RetrievalQA.from_chain_type(
101
  llm=st.session_state.llm,
@@ -109,7 +93,6 @@ if uploaded_file is not None:
109
  }
110
  )
111
 
112
- # Chat input
113
  if user_input := st.chat_input("You:", key="user_input"):
114
  user_message = {"role": "user", "message": user_input}
115
  st.session_state.chat_history.append(user_message)
@@ -123,13 +106,10 @@ if uploaded_file is not None:
123
  for chunk in response['result'].split():
124
  full_response += chunk + " "
125
  time.sleep(0.05)
126
- # Add a blinking cursor to simulate typing
127
  message_placeholder.markdown(full_response + "▌")
128
  message_placeholder.markdown(full_response)
129
 
130
  chatbot_message = {"role": "assistant", "message": response['result']}
131
  st.session_state.chat_history.append(chatbot_message)
132
-
133
-
134
  else:
135
- st.write("Please upload a PDF... file.")
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
3
  import time
4
  from langchain_community.llms import HuggingFaceEndpoint
 
 
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.document_loaders import PyPDFLoader
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
13
+ from langchain.callbacks.manager import CallbackManager
14
 
15
+ # Model and Embedding Configuration
16
  model_name = "sentence-transformers/all-mpnet-base-v2"
17
  model_kwargs = {'device': 'cpu'}
18
  encode_kwargs = {'normalize_embeddings': False}
19
+
20
  embeddings = HuggingFaceEmbeddings(
21
  model_name=model_name,
22
  model_kwargs=model_kwargs,
23
  encode_kwargs=encode_kwargs
24
  )
25
 
26
+ # Directory setup
27
+ os.makedirs('files', exist_ok=True)
28
+ os.makedirs('jj', exist_ok=True)
 
 
29
 
30
+ # Streamlit session state setup
31
  if 'template' not in st.session_state:
32
  st.session_state.template = """You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.Try to give answer in tabular and shortcut.
33
 
 
47
  return_messages=True,
48
  input_key="question")
49
  if 'vectorstore' not in st.session_state:
50
+ # Proper embedding configuration, avoids meta tensor errors
51
+ st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=embeddings)
52
+
53
  if 'llm' not in st.session_state:
 
54
  st.session_state.llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9)
55
+
 
56
  if 'chat_history' not in st.session_state:
57
  st.session_state.chat_history = []
58
 
59
  st.title("PDF Chatbot")
60
 
 
61
  uploaded_file = st.file_uploader("Upload your PDF", type='pdf')
 
62
  for message in st.session_state.chat_history:
63
  with st.chat_message(message["role"]):
64
  st.markdown(message["message"])
65
 
66
  if uploaded_file is not None:
67
+ file_path = os.path.join("files", uploaded_file.name + ".pdf")
68
+ if not os.path.isfile(file_path):
69
  with st.status("Analyzing your document..."):
70
  bytes_data = uploaded_file.read()
71
+ with open(file_path, "wb") as f:
72
+ f.write(bytes_data)
73
+ loader = PyPDFLoader(file_path)
 
74
  data = loader.load()
75
 
76
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0, length_function=len)
 
 
 
 
 
77
  all_splits = text_splitter.split_documents(data)
78
 
79
+ st.session_state.vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
 
 
80
  st.session_state.vectorstore.persist()
81
 
82
  st.session_state.retriever = st.session_state.vectorstore.as_retriever()
 
83
  if 'qa_chain' not in st.session_state:
84
  st.session_state.qa_chain = RetrievalQA.from_chain_type(
85
  llm=st.session_state.llm,
 
93
  }
94
  )
95
 
 
96
  if user_input := st.chat_input("You:", key="user_input"):
97
  user_message = {"role": "user", "message": user_input}
98
  st.session_state.chat_history.append(user_message)
 
106
  for chunk in response['result'].split():
107
  full_response += chunk + " "
108
  time.sleep(0.05)
 
109
  message_placeholder.markdown(full_response + "▌")
110
  message_placeholder.markdown(full_response)
111
 
112
  chatbot_message = {"role": "assistant", "message": response['result']}
113
  st.session_state.chat_history.append(chatbot_message)
 
 
114
  else:
115
+ st.write("Please upload a PDF... file.")