Spaces:
Sleeping
Sleeping
Commit ·
24b6b59
1
Parent(s): 57a8908
normalized fixed + in header text as well
Browse files- index_retriever.py +6 -2
index_retriever.py
CHANGED
|
@@ -20,8 +20,12 @@ def create_vector_index(documents):
|
|
| 20 |
# Normalize text content for BM25
|
| 21 |
if hasattr(doc, 'text'):
|
| 22 |
from documents_prep import normalize_connection_type
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
processed_docs.append(doc)
|
| 26 |
|
| 27 |
if doc.metadata.get('type') == 'table':
|
|
|
|
| 20 |
# Normalize text content for BM25
|
| 21 |
if hasattr(doc, 'text'):
|
| 22 |
from documents_prep import normalize_connection_type
|
| 23 |
+
normalized_text = normalize_connection_type(doc.text)
|
| 24 |
+
# Create a new Document with normalized text and same metadata
|
| 25 |
+
doc = Document(
|
| 26 |
+
text=normalized_text,
|
| 27 |
+
metadata=doc.metadata
|
| 28 |
+
)
|
| 29 |
processed_docs.append(doc)
|
| 30 |
|
| 31 |
if doc.metadata.get('type') == 'table':
|