Shubham170793 commited on
Commit
dd1dffd
·
verified ·
1 Parent(s): 7e17840

Update src/ingestion.py

Browse files
Files changed (1) hide show
  1. src/ingestion.py +1 -1
src/ingestion.py CHANGED
@@ -250,7 +250,7 @@ def chunk_text(text: str, chunk_size: int = None, overlap: int = None) -> list:
250
 
251
  # --- 🧩 Detect procedural sections (new) ---
252
  procedure_blocks = re.split(
253
- r"(?=\n\d+\.\d+\s+(?:Create|Configure|Set\s*up|Setup|Steps?|Process|Procedure|Integration|Replication|Connection|Mapping|Restrictions?|Schedule|Definition|Configuration))", text
254
  )
255
  chunks = []
256
 
 
250
 
251
  # --- 🧩 Detect procedural sections (new) ---
252
  procedure_blocks = re.split(
253
+ r"(?=(?:\s*\n|\s+)\d+\.\d+\s+(?:Create|Configure|Set\s*up|Setup|Steps?|Process|Procedure|Integration|Replication|Connection|Mapping|Restrictions?|Limitations?|Prerequisites?|Considerations?|Guidelines?|Notes?|Cautions?|Recommendations?)\b)", text, flags=re.IGNORECASE
254
  )
255
  chunks = []
256