Spaces:
Runtime error
Runtime error
Commit
·
2e4daca
1
Parent(s):
e2fe55a
made corrections for preprocessing
Browse files- app.py +3 -2
- utils/haystack.py +1 -1
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from json import JSONDecodeError
|
|
| 8 |
from markdown import markdown
|
| 9 |
from utils.config import parser
|
| 10 |
from utils.haystack import start_document_store, start_haystack_extractive, start_haystack_rag, query, start_preprocessor_node, start_retriever, start_reader
|
| 11 |
-
from utils.ui import reset_results, set_initial_state
|
| 12 |
|
| 13 |
# Sliders
|
| 14 |
DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
|
|
@@ -34,7 +34,7 @@ def upload_files():
|
|
| 34 |
|
| 35 |
def process_file(data_file, preprocesor, document_store):
|
| 36 |
# read file and add content
|
| 37 |
-
file_contents = data_file.read()
|
| 38 |
docs = [{
|
| 39 |
'content': str(file_contents),
|
| 40 |
'meta': {'name': str(data_file.name)}
|
|
@@ -47,6 +47,7 @@ def process_file(data_file, preprocesor, document_store):
|
|
| 47 |
print(f"{data_file.name} already processed")
|
| 48 |
else:
|
| 49 |
print(f'preprocessing uploaded doc {data_file.name}.......')
|
|
|
|
| 50 |
preprocessed_docs = preprocesor.process(docs)
|
| 51 |
print('writing to document store.......')
|
| 52 |
document_store.write_documents(preprocessed_docs)
|
|
|
|
| 8 |
from markdown import markdown
|
| 9 |
from utils.config import parser
|
| 10 |
from utils.haystack import start_document_store, start_haystack_extractive, start_haystack_rag, query, start_preprocessor_node, start_retriever, start_reader
|
| 11 |
+
from utils.ui import reset_results, set_initial_state
|
| 12 |
|
| 13 |
# Sliders
|
| 14 |
DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
|
|
|
|
| 34 |
|
| 35 |
def process_file(data_file, preprocesor, document_store):
|
| 36 |
# read file and add content
|
| 37 |
+
file_contents = data_file.read().decode("utf-8")
|
| 38 |
docs = [{
|
| 39 |
'content': str(file_contents),
|
| 40 |
'meta': {'name': str(data_file.name)}
|
|
|
|
| 47 |
print(f"{data_file.name} already processed")
|
| 48 |
else:
|
| 49 |
print(f'preprocessing uploaded doc {data_file.name}.......')
|
| 50 |
+
#print(data_file.read().decode("utf-8"))
|
| 51 |
preprocessed_docs = preprocesor.process(docs)
|
| 52 |
print('writing to document store.......')
|
| 53 |
document_store.write_documents(preprocessed_docs)
|
utils/haystack.py
CHANGED
|
@@ -13,7 +13,7 @@ from milvus_haystack import MilvusDocumentStore
|
|
| 13 |
def start_preprocessor_node():
|
| 14 |
print('initializing preprocessor node')
|
| 15 |
processor = PreProcessor(
|
| 16 |
-
clean_empty_lines=True,
|
| 17 |
clean_whitespace=True,
|
| 18 |
clean_header_footer=True,
|
| 19 |
#remove_substrings=None,
|
|
|
|
| 13 |
def start_preprocessor_node():
|
| 14 |
print('initializing preprocessor node')
|
| 15 |
processor = PreProcessor(
|
| 16 |
+
clean_empty_lines= True,
|
| 17 |
clean_whitespace=True,
|
| 18 |
clean_header_footer=True,
|
| 19 |
#remove_substrings=None,
|