Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -178,15 +178,37 @@ def getRAGChain(customerName,customerDistrict, custDetailsPresent,vectordb):
|
|
| 178 |
)
|
| 179 |
return chain
|
| 180 |
|
| 181 |
-
def createVectorDB(documents):
|
| 182 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
|
| 183 |
-
texts =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
print("All chunk List START ***********************\n\n")
|
| 185 |
pretty_print_docs(texts)
|
| 186 |
print("All chunk List END ***********************\n\n")
|
| 187 |
-
embeddings = getEmbeddingModel(
|
| 188 |
-
|
|
|
|
|
|
|
| 189 |
return vectordb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
def createPrompt(cName, cCity, custDetailsPresent):
|
| 192 |
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n"
|
|
@@ -213,7 +235,7 @@ def createPrompt(cName, cCity, custDetailsPresent):
|
|
| 213 |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"])
|
| 214 |
return PROMPT
|
| 215 |
|
| 216 |
-
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None))
|
| 217 |
|
| 218 |
@app.route('/', methods=['GET'])
|
| 219 |
def test():
|
|
@@ -287,7 +309,7 @@ def file_Upload():
|
|
| 287 |
embeddingModelID = int(request.form.getlist('embeddingModelID')[0])
|
| 288 |
global vectordb
|
| 289 |
vectordb = createVectorDB(documents, embeddingModelID)
|
| 290 |
-
vectordb=createVectorDB(documents)
|
| 291 |
return render_template("index.html")
|
| 292 |
|
| 293 |
if __name__ == '__main__':
|
|
|
|
| 178 |
)
|
| 179 |
return chain
|
| 180 |
|
| 181 |
+
def createVectorDB(documents,embeddingModelID):
|
| 182 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
|
| 183 |
+
texts = []
|
| 184 |
+
for document in documents:
|
| 185 |
+
tokenized_input = tokenizer.tokenize(document.page_content)
|
| 186 |
+
print("Token Count::::::::::" + str(len(tokenized_input)))
|
| 187 |
+
if (len(tokenized_input) > 1000):
|
| 188 |
+
print("Splitting Content using RTS")
|
| 189 |
+
splitted_doc = text_splitter.split_documents([document])
|
| 190 |
+
texts.extend(splitted_doc)
|
| 191 |
+
# for text in texts:
|
| 192 |
+
# print("splitted content:"+str(len(text.page_content)))
|
| 193 |
+
# print(text.page_content)
|
| 194 |
+
elif (len(tokenized_input) < 1000 and len(tokenized_input) > 1):
|
| 195 |
+
texts.append(document)
|
| 196 |
+
# texts = text_splitter.split_documents(documents)
|
| 197 |
print("All chunk List START ***********************\n\n")
|
| 198 |
pretty_print_docs(texts)
|
| 199 |
print("All chunk List END ***********************\n\n")
|
| 200 |
+
embeddings = getEmbeddingModel(embeddingModelID)
|
| 201 |
+
print("Embedding Started >>>>>>>>>>>>>>>>>>", datetime.now().strftime("%H:%M:%S"))
|
| 202 |
+
vectordb = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space": "cosine"})
|
| 203 |
+
print("Vector Store Creation Completed*********************************\n\n")
|
| 204 |
return vectordb
|
| 205 |
+
# texts = text_splitter.split_documents(documents)
|
| 206 |
+
# print("All chunk List START ***********************\n\n")
|
| 207 |
+
# pretty_print_docs(texts)
|
| 208 |
+
# print("All chunk List END ***********************\n\n")
|
| 209 |
+
# embeddings = getEmbeddingModel(0)
|
| 210 |
+
# vectordb = Chroma.from_documents(texts, embeddings)
|
| 211 |
+
# return vectordb
|
| 212 |
|
| 213 |
def createPrompt(cName, cCity, custDetailsPresent):
|
| 214 |
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n"
|
|
|
|
| 235 |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"])
|
| 236 |
return PROMPT
|
| 237 |
|
| 238 |
+
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None),defaultEmbeddingModelID)
|
| 239 |
|
| 240 |
@app.route('/', methods=['GET'])
|
| 241 |
def test():
|
|
|
|
| 309 |
embeddingModelID = int(request.form.getlist('embeddingModelID')[0])
|
| 310 |
global vectordb
|
| 311 |
vectordb = createVectorDB(documents, embeddingModelID)
|
| 312 |
+
#vectordb=createVectorDB(documents)
|
| 313 |
return render_template("index.html")
|
| 314 |
|
| 315 |
if __name__ == '__main__':
|