Update src/ingestion.py
Browse files- src/ingestion.py +1 -1
src/ingestion.py
CHANGED
|
@@ -35,7 +35,7 @@ def extract_text_from_pdf(file_path: str) -> str:
|
|
| 35 |
# -----------------------------
|
| 36 |
# SMART CHUNKING (Step-Aware + Context Aware)
|
| 37 |
# -----------------------------
|
| 38 |
-
def chunk_text(text: str, chunk_size: int =
|
| 39 |
"""
|
| 40 |
Splits text into overlapping, structured chunks.
|
| 41 |
Detects procedural steps (e.g., 'Step 1:', 'STEP 2.') and keeps them intact.
|
|
|
|
| 35 |
# -----------------------------
|
| 36 |
# SMART CHUNKING (Step-Aware + Context Aware)
|
| 37 |
# -----------------------------
|
| 38 |
+
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 80) -> list:
|
| 39 |
"""
|
| 40 |
Splits text into overlapping, structured chunks.
|
| 41 |
Detects procedural steps (e.g., 'Step 1:', 'STEP 2.') and keeps them intact.
|