Shubham170793 commited on
Commit
eaada01
·
verified ·
1 Parent(s): f2fb7ac

Update src/ingestion.py

Browse files
Files changed (1) hide show
  1. src/ingestion.py +1 -1
src/ingestion.py CHANGED
@@ -98,7 +98,7 @@ def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 80) -> list:
98
 
99
  # --- Step 1️⃣: Split into logical sections by headings or step titles ---
100
  # Detect section headers like "3.1.2 Prerequisites for Commerce Automation", "Step 2:", etc.
101
- section_pattern = r"(?=(?:\n?\d+(\.\d+){0,3}\s+[A-Z][^\n]{3,100})|(?:Step\s*\d+[:.\s]))"
102
  sections = re.split(section_pattern, text)
103
  sections = [s.strip() for s in sections if s.strip()]
104
 
 
98
 
99
  # --- Step 1️⃣: Split into logical sections by headings or step titles ---
100
  # Detect section headers like "3.1.2 Prerequisites for Commerce Automation", "Step 2:", etc.
101
+ section_pattern = r"(?=(?:\n?\d+(?:\.\d+){0,3}\s+[A-Z][^\n]{3,100})|(?:Step\s*\d+[:.\s]))"
102
  sections = re.split(section_pattern, text)
103
  sections = [s.strip() for s in sections if s.strip()]
104