Update src/ingestion.py
Browse files- src/ingestion.py +1 -1
src/ingestion.py
CHANGED
|
@@ -250,7 +250,7 @@ def chunk_text(text: str, chunk_size: int = None, overlap: int = None) -> list:
|
|
| 250 |
|
| 251 |
# --- 🧩 Detect procedural sections (new) ---
|
| 252 |
procedure_blocks = re.split(
|
| 253 |
-
r"(?=\n\d+\.\d+\s+(?:Create|Configure|Set\s*up|Setup|Steps?|Process|Procedure|Integration|Replication|Connection|Mapping|Restrictions?|
|
| 254 |
)
|
| 255 |
chunks = []
|
| 256 |
|
|
|
|
| 250 |
|
| 251 |
# --- 🧩 Detect procedural sections (new) ---
|
| 252 |
procedure_blocks = re.split(
|
| 253 |
+
r"(?=(?:\s*\n|\s+)\d+\.\d+\s+(?:Create|Configure|Set\s*up|Setup|Steps?|Process|Procedure|Integration|Replication|Connection|Mapping|Restrictions?|Limitations?|Prerequisites?|Considerations?|Guidelines?|Notes?|Cautions?|Recommendations?)\b)", text, flags=re.IGNORECASE
|
| 254 |
)
|
| 255 |
chunks = []
|
| 256 |
|