ZainabNac commited on
Commit
e1b7609
·
1 Parent(s): 2104ea8

Upload appchat.py

Browse files
Files changed (1) hide show
  1. appchat.py +269 -0
appchat.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """chatbot_with_memory (1).ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1sIEqI5-wciuiYOdlEYwBkTPUIlvMEzkF
8
+ """
9
+
10
+ !pip install chromadb==0.4.6
11
+ !pip install pydantic==1.10
12
+ !pip install sentence-transformers
13
+
14
+ !pip install huggingface_hub
15
+
16
+ !pip install transformers
17
+
18
+ from langchain.document_loaders import TextLoader #for textfiles
19
+ from langchain.text_splitter import CharacterTextSplitter #text splitter
20
+ from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
21
+ from langchain.vectorstores import FAISS
22
+ from langchain.chains.question_answering import load_qa_chain
23
+ from langchain.chains.question_answering import load_qa_chain
24
+ from langchain import HuggingFaceHub
25
+ from langchain.document_loaders import UnstructuredPDFLoader #load pdf
26
+ from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
27
+ from langchain.chains import RetrievalQA
28
+ from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader
29
+ from langchain.chains.question_answering import load_qa_chain
30
+ from langchain import HuggingFaceHub
31
+ import os
32
+ huggingfacehub_api_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
33
+
34
+
35
+ pip install pypdf
36
+
37
+ from langchain.document_loaders import PyPDFLoader
38
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
39
+
40
+ #import csvfrom langchain.document_loaders import PyPDFLoader
41
+ # Load the PDF file from current working directory
42
+ loader = PyPDFLoader("/content/Document sans titre (5).pdf")
43
+ # Split the PDF into Pages
44
+ pages = loader.load_and_split()
45
+
46
+ #import from langchain.text_splitter import RecursiveCharacterTextSplitter
47
+ # Define chunk size, overlap and separators
48
+ text_splitter = RecursiveCharacterTextSplitter(
49
+ chunk_size= 128,
50
+ chunk_overlap=64,
51
+ separators=['\n\n', '\n', '(?=>\. )', ' ', '']
52
+ )
53
+ docs = text_splitter.split_documents(pages)
54
+
55
+ from langchain.embeddings import HuggingFaceEmbeddings
56
+ embeddings = HuggingFaceEmbeddings()
57
+
58
+ pip install faiss-gpu
59
+
60
+ #Create the vectorized db
61
+ # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
62
+ from langchain.vectorstores import FAISS
63
+ db = FAISS.from_documents(docs, embeddings)
64
+
65
+ llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000, "max_new_tokens": 500})
66
+ chain = load_qa_chain(llm, chain_type="stuff")
67
+
68
+ #QUERYING
69
+ query = "quelles sont les villes les facultees de medcine ?"
70
+ docs = db.similarity_search(query)
71
+ chain.run(input_documents=docs, question=query)
72
+
73
+ from langchain.chains import RetrievalQA
74
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",
75
+ retriever=db.as_retriever(search_kwargs={"k": 3}))
76
+
77
+ query = "donner moi plus des information sur les facultees de medcine?"
78
+ qa.run(query)
79
+
80
+ query = "What is the meaning of Descriptive Data Analysis?"
81
+ qa.run(query)#import csv
82
+
83
+ repo_id = 'google/flan-t5-xxl' # has 3B parameters: https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
84
+ llm = HuggingFaceHub(huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
85
+ repo_id=repo_id,
86
+ model_kwargs={'temperature':0.5, 'max_length':256})
87
+
88
+ query1 = "Bonjour, je suis zaynab ,j'ai des questions a vous "
89
+ query2 = "j'habite a marrakech. tu sait son pays?"
90
+ query3 = "quel est mon prenom?"
91
+ query4 = "ou j'habite"
92
+
93
+ pip install langchain --upgrade
94
+
95
+ from langchain import HuggingFaceHub
96
+ from langchain.chains import ConversationChain
97
+
98
+ """### Conversation Buffer memory"""
99
+
100
+ from langchain.chains.conversation.memory import ConversationBufferMemory
101
+ # Adjust the import path accordingly
102
+ memory = ConversationBufferMemory()
103
+ conversation_buf = ConversationChain(
104
+ llm=llm,
105
+ memory=memory)
106
+
107
+ print("input: ",query1)
108
+ conversation_buf.predict(input=query1)
109
+
110
+ print("input: ",query2)
111
+ conversation_buf.predict(input=query2)
112
+
113
+ memory.load_memory_variables({})
114
+
115
+ print("input: ",query3)
116
+ conversation_buf.predict(input=query3)
117
+
118
+ print("input: ",query4)
119
+ conversation_buf.predict(input=query4)
120
+
121
+ print(memory.buffer)
122
+
123
+ """### Conversation Buffer Window Memory"""
124
+
125
+ from langchain.memory import ConversationBufferWindowMemory
126
+
127
+ memory2 = ConversationBufferWindowMemory(k=2)
128
+ conversation_buf2 = ConversationChain(
129
+ llm=llm,
130
+ memory=memory2
131
+ )
132
+
133
+ print("input: ",query1)
134
+ conversation_buf2.predict(input=query1)
135
+
136
+ print("input: ",query2)
137
+ conversation_buf2.predict(input=query2)
138
+
139
+ print("input: ",query3)
140
+ conversation_buf2.predict(input=query3)
141
+
142
+ print(memory2.buffer)
143
+
144
+ """### Conversation Summary Memory"""
145
+
146
+ from langchain.memory import ConversationSummaryBufferMemory
147
+
148
+ memory3 = ConversationSummaryBufferMemory(llm=llm, max_token_limit=80)
149
+ conversation_buf3 = ConversationChain(
150
+ llm=llm,
151
+ memory=memory3
152
+ )
153
+
154
+ print("input: ",query1)
155
+ conversation_buf3.predict(input=query1)
156
+
157
+ print("input: ",query2)
158
+ conversation_buf3.predict(input=query2)
159
+
160
+ print("input: ",query3)
161
+ conversation_buf3.predict(input=query3)
162
+
163
+ memory3.load_memory_variables({})
164
+
165
+ """### Chat PDF with Memory
166
+
167
+ Updated version of Pydantic package (dependency of chromadb) has changed leaving chromadb, incompatible: here are the possible solutions: [import error chromadb](https://github.com/langchain-ai/langchain/issues/1957) || Install specific versions of chromadb and pydantic while the bug is resolved
168
+
169
+ ![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAACSIAAAFYCAYAAABNxBBUAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAAHYcAAB2HAY/l8WUAAP+lSURBVHhe7N0LfFTlnT/+TzKXTCaZXBlyIyRcQkMImEbEgEARF7FSyipdqZd2EV30Z9W2S60vdX2p61b/1rK1al211tpatdqii/GGrICAGAXTyCWkXBNCLjDkOpkwmcnl/5zLTCaTmckkMwkJfN6v17w4c4bJOec5z3meZ87zPc8TUVRU1AMiIiIiIiIiIiIiIiIiIiIiIqIQRKr/EhERERERERERERERERERERERDRkDkYiIiIiIiIiIiIiIiIiIiIiIKGQMRCIiIiIiIiIiIiIiIiIiIiIiopAxEImIiIiIiIiIiIiIiIiIiIiIiELGQCQiIiIiIiIiIiIiIiIiIiIiIgoZA5GIiIiIiIiIiIiIiIiIiIiIiChkDEQiIiIiIiIiIiIiIiIiIiIiIqKQMRCJiIiIiIiIiIiIiIiIiIiIiIhCxkAkIiIiIiIiIiIiIiIiIiIiIiIKGQORiIiIiIiIiIiIiIiIiIiIiIgo
170
+ """
171
+
172
+ !pip install pypdf
173
+
174
+ import langchain
175
+ import chromadb
176
+
177
+ import os
178
+ import getpass
179
+
180
+ from langchain.document_loaders import PyPDFLoader #document loader: https://python.langchain.com/docs/modules/data_connection/document_loaders
181
+ from langchain.text_splitter import RecursiveCharacterTextSplitter #document transformer: text splitter for chunking
182
+ from langchain.embeddings import HuggingFaceEmbeddings
183
+ from langchain import PromptTemplate
184
+ from langchain.vectorstores import Chroma #vector store
185
+ from langchain import HuggingFaceHub #model hub
186
+ from langchain.chains import RetrievalQA
187
+
188
+ from langchain.memory import ConversationBufferMemory
189
+
190
+ #loading the API key
191
+ import getpass
192
+ import os
193
+ os.environ['HUGGING_FACE_HUB_API_KEY'] = getpass.getpass('Hugging face api key:')
194
+
195
+ path = input("Enter PDF file path: ")#"C:/Users/Sourav/Downloads/pdf"
196
+ loader = PyPDFLoader(path)
197
+ pages = loader.load()
198
+
199
+ #number of pages
200
+ len(pages)
201
+
202
+ splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=10)
203
+ docs = splitter.split_documents(pages)
204
+
205
+ tokens = docs
206
+ num_tokens = len(tokens)
207
+ print("Nombre de jetons :", num_tokens)
208
+
209
+ for token in tokens:
210
+ print(token)
211
+
212
+ embeddings = HuggingFaceEmbeddings()
213
+ doc_search = Chroma.from_documents(docs, embeddings)
214
+
215
+ print(doc_search)
216
+
217
+ query = "Quelle sont les Facultees existent ?"
218
+ similar_docs = doc_search.similarity_search(query, k=3)
219
+
220
+ print(similar_docs)
221
+
222
+ query = "donner moi des information ecole nationale d'Industrie Minérale ?"
223
+ similar_docs = doc_search.similarity_search(query, k=10)
224
+
225
+ repo_id = 'google/flan-t5-xxl' # has 3B parameters: https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
226
+ llm = HuggingFaceHub(huggingfacehub_api_token=os.environ['HUGGING_FACE_HUB_API_KEY'],
227
+ repo_id=repo_id,
228
+ model_kwargs={'temperature':1, 'max_length':10000000000, "max_tokens":1000000000})
229
+
230
+ template = """
231
+ Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to answer the question:
232
+ ------
233
+ <ctx>
234
+ {context}
235
+ </ctx>
236
+ ------
237
+ <hs>
238
+ {history}
239
+ </hs>
240
+ ------
241
+ {question}
242
+ Answer:
243
+ """
244
+ prompt = PromptTemplate(
245
+ input_variables=["history", "context", "question"],
246
+ template=template,
247
+ )
248
+
249
+ memory = ConversationBufferMemory(
250
+ memory_key="history",
251
+ input_key="question"
252
+ )
253
+
254
+ retrieval_chain = RetrievalQA.from_chain_type(llm,
255
+ chain_type='stuff',
256
+ retriever=doc_search.as_retriever(),
257
+ chain_type_kwargs={
258
+ "prompt": prompt,
259
+ "memory": memory
260
+ })
261
+
262
+ query = " donner moi les villes de ces facultees de medcine? "
263
+ retrieval_chain.run(query)
264
+
265
+ query = "donner moi des information sur Facultees de medcine ?"
266
+ retrieval_chain.run(query)
267
+
268
+ memory.load_memory_variables({})
269
+