Spaces:
Sleeping
Sleeping
Commit ·
7a2d0fe
1
Parent(s): 011d448
tested
Browse files
app.py
CHANGED
|
@@ -17,7 +17,6 @@ import textract
|
|
| 17 |
st.set_page_config(page_title="chatbot")
|
| 18 |
st.title("Chat with Documents")
|
| 19 |
|
| 20 |
-
|
| 21 |
num_of_top_selection = 3
|
| 22 |
CHUNK_SIZE = 500
|
| 23 |
CHUNK_OVERLAP = 50
|
|
@@ -63,8 +62,7 @@ def get_text_from_docx(docx):
|
|
| 63 |
return text
|
| 64 |
|
| 65 |
def get_text_from_text_file(text_file):
|
| 66 |
-
|
| 67 |
-
text = file.read()
|
| 68 |
return text
|
| 69 |
|
| 70 |
def get_text_from_other_file(file_path):
|
|
@@ -83,7 +81,7 @@ def load_documents(docs):
|
|
| 83 |
elif doc.name.lower().endswith('.docx'):
|
| 84 |
text += get_text_from_docx(doc)
|
| 85 |
elif doc.name.lower().endswith(('.txt', '.md')):
|
| 86 |
-
text += get_text_from_text_file(doc)
|
| 87 |
else:
|
| 88 |
# Handle other file types, you can extend this as needed
|
| 89 |
text += get_text_from_other_file(doc)
|
|
@@ -128,7 +126,7 @@ def input_fields():
|
|
| 128 |
# st.text_input("Pinecone environment")
|
| 129 |
st.session_state.pinecone_index = index_name
|
| 130 |
# st.text_input("Pinecone index name")
|
| 131 |
-
st.session_state.source_docs = st.file_uploader(label="Upload Documents",
|
| 132 |
#
|
| 133 |
|
| 134 |
|
|
@@ -137,7 +135,8 @@ def process_documents():
|
|
| 137 |
if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
|
| 138 |
st.warning(f"Please upload the documents and provide the missing fields.")
|
| 139 |
else:
|
| 140 |
-
try:
|
|
|
|
| 141 |
# for source_doc in st.session_state.source_docs:
|
| 142 |
if st.session_state.source_docs:
|
| 143 |
#
|
|
@@ -149,8 +148,8 @@ def process_documents():
|
|
| 149 |
texts = split_documents(documents)
|
| 150 |
#
|
| 151 |
st.session_state.retriever = embeddings_on_pinecone(texts)
|
| 152 |
-
except Exception as e:
|
| 153 |
-
|
| 154 |
|
| 155 |
def boot():
|
| 156 |
#
|
|
|
|
| 17 |
st.set_page_config(page_title="chatbot")
|
| 18 |
st.title("Chat with Documents")
|
| 19 |
|
|
|
|
| 20 |
num_of_top_selection = 3
|
| 21 |
CHUNK_SIZE = 500
|
| 22 |
CHUNK_OVERLAP = 50
|
|
|
|
| 62 |
return text
|
| 63 |
|
| 64 |
def get_text_from_text_file(text_file):
|
| 65 |
+
text = text_file.read()
|
|
|
|
| 66 |
return text
|
| 67 |
|
| 68 |
def get_text_from_other_file(file_path):
|
|
|
|
| 81 |
elif doc.name.lower().endswith('.docx'):
|
| 82 |
text += get_text_from_docx(doc)
|
| 83 |
elif doc.name.lower().endswith(('.txt', '.md')):
|
| 84 |
+
text += str(get_text_from_text_file(doc))
|
| 85 |
else:
|
| 86 |
# Handle other file types, you can extend this as needed
|
| 87 |
text += get_text_from_other_file(doc)
|
|
|
|
| 126 |
# st.text_input("Pinecone environment")
|
| 127 |
st.session_state.pinecone_index = index_name
|
| 128 |
# st.text_input("Pinecone index name")
|
| 129 |
+
st.session_state.source_docs = st.file_uploader(label="Upload Documents", accept_multiple_files=True)
|
| 130 |
#
|
| 131 |
|
| 132 |
|
|
|
|
| 135 |
if not st.session_state.pinecone_api_key or not st.session_state.pinecone_env or not st.session_state.pinecone_index or not st.session_state.source_docs:
|
| 136 |
st.warning(f"Please upload the documents and provide the missing fields.")
|
| 137 |
else:
|
| 138 |
+
# try:
|
| 139 |
+
if True:
|
| 140 |
# for source_doc in st.session_state.source_docs:
|
| 141 |
if st.session_state.source_docs:
|
| 142 |
#
|
|
|
|
| 148 |
texts = split_documents(documents)
|
| 149 |
#
|
| 150 |
st.session_state.retriever = embeddings_on_pinecone(texts)
|
| 151 |
+
# except Exception as e:
|
| 152 |
+
# st.error(f"An error occurred: {e}")
|
| 153 |
|
| 154 |
def boot():
|
| 155 |
#
|