Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import langchain
|
|
| 4 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
|
| 6 |
from langchain.indexes import VectorstoreIndexCreator
|
| 7 |
-
from langchain_community.vectorstores import
|
| 8 |
from zipfile import ZipFile
|
| 9 |
import gradio as gr
|
| 10 |
import openpyxl
|
|
@@ -136,7 +136,7 @@ def merge_txt_to_db(filename,db,progress,progress_step=0.1):
|
|
| 136 |
progress(progress_step,'txt unpacked')
|
| 137 |
return merge_split_docs_to_db(split_docs,db,progress,progress_step)
|
| 138 |
|
| 139 |
-
def unpack_zip_file(filename:str,db:
|
| 140 |
with ZipFile(filename, 'r') as zipObj:
|
| 141 |
contents = zipObj.namelist()
|
| 142 |
print(f"unpack zip: contents: {contents}")
|
|
@@ -144,7 +144,7 @@ def unpack_zip_file(filename:str,db:FAISS,progress):
|
|
| 144 |
shutil.unpack_archive(filename, tmp_directory)
|
| 145 |
|
| 146 |
if 'index.faiss' in [item.lower() for item in contents]:
|
| 147 |
-
db2 =
|
| 148 |
db.merge_from(db2)
|
| 149 |
return db
|
| 150 |
|
|
@@ -179,11 +179,11 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
|
|
| 179 |
session_id = f"PDFAISS-{ui_session_id}"
|
| 180 |
|
| 181 |
try:
|
| 182 |
-
db =
|
| 183 |
except:
|
| 184 |
print(f"SESSION: {session_id} database does not exist, create a FAISS db")
|
| 185 |
#db = FAISS.from_documents([foo], embeddings)
|
| 186 |
-
db =
|
| 187 |
db.save_local(session_id)
|
| 188 |
print(f"SESSION: {session_id} database created")
|
| 189 |
|
|
@@ -222,7 +222,7 @@ def embed_files(files,ui_session_id,progress=gr.Progress(),progress_step=0.05):
|
|
| 222 |
|
| 223 |
### load the updated db and zip it ###
|
| 224 |
progress(progress_step, desc = 'loading db')
|
| 225 |
-
db =
|
| 226 |
print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
|
| 227 |
progress(progress_step, desc = 'zipping db for download')
|
| 228 |
add_files_to_zip(session_id)
|
|
@@ -316,7 +316,7 @@ def ask_gpt(query, ui_session_id, history):
|
|
| 316 |
return "Please Login", "", ""
|
| 317 |
session_id = f"PDFAISS-{ui_session_id}"
|
| 318 |
try:
|
| 319 |
-
db =
|
| 320 |
print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
|
| 321 |
except:
|
| 322 |
print(f"SESSION: {session_id} database does not exist")
|
|
|
|
| 4 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
from langchain_community.document_loaders import UnstructuredPDFLoader,UnstructuredWordDocumentLoader
|
| 6 |
from langchain.indexes import VectorstoreIndexCreator
|
| 7 |
+
from langchain_community.vectorstores import LangChainFAISS
|
| 8 |
from zipfile import ZipFile
|
| 9 |
import gradio as gr
|
| 10 |
import openpyxl
|
|
|
|
| 136 |
progress(progress_step,'txt unpacked')
|
| 137 |
return merge_split_docs_to_db(split_docs,db,progress,progress_step)
|
| 138 |
|
| 139 |
+
def unpack_zip_file(filename:str,db:LangChainFAISS,progress):
|
| 140 |
with ZipFile(filename, 'r') as zipObj:
|
| 141 |
contents = zipObj.namelist()
|
| 142 |
print(f"unpack zip: contents: {contents}")
|
|
|
|
| 144 |
shutil.unpack_archive(filename, tmp_directory)
|
| 145 |
|
| 146 |
if 'index.faiss' in [item.lower() for item in contents]:
|
| 147 |
+
db2 = LangChainFAISS.load_local(tmp_directory, embeddings, allow_dangerous_deserialization=True)
|
| 148 |
db.merge_from(db2)
|
| 149 |
return db
|
| 150 |
|
|
|
|
| 179 |
session_id = f"PDFAISS-{ui_session_id}"
|
| 180 |
|
| 181 |
try:
|
| 182 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
| 183 |
except:
|
| 184 |
print(f"SESSION: {session_id} database does not exist, create a FAISS db")
|
| 185 |
#db = FAISS.from_documents([foo], embeddings)
|
| 186 |
+
db = LangChainFAISS.from_texts(["foo is fou!"],embeddings,[{"source":"foo source"}])
|
| 187 |
db.save_local(session_id)
|
| 188 |
print(f"SESSION: {session_id} database created")
|
| 189 |
|
|
|
|
| 222 |
|
| 223 |
### load the updated db and zip it ###
|
| 224 |
progress(progress_step, desc = 'loading db')
|
| 225 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
| 226 |
print("EMBEDDED, after embeddeding: ",session_id,len(db.index_to_docstore_id))
|
| 227 |
progress(progress_step, desc = 'zipping db for download')
|
| 228 |
add_files_to_zip(session_id)
|
|
|
|
| 316 |
return "Please Login", "", ""
|
| 317 |
session_id = f"PDFAISS-{ui_session_id}"
|
| 318 |
try:
|
| 319 |
+
db = LangChainFAISS.load_local(session_id,embeddings, allow_dangerous_deserialization=True)
|
| 320 |
print("ASKGPT after loading",session_id,len(db.index_to_docstore_id))
|
| 321 |
except:
|
| 322 |
print(f"SESSION: {session_id} database does not exist")
|