Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def chunk_text(text, chunk_size=400, overlap=80):
|
| 2 |
+
chunks = []
|
| 3 |
+
start = 0
|
| 4 |
+
text_length = len(text)
|
| 5 |
+
|
| 6 |
+
while start < text_length:
|
| 7 |
+
end = min(start + chunk_size, text_length)
|
| 8 |
+
chunks.append(text[start:end])
|
| 9 |
+
start += chunk_size - overlap
|
| 10 |
+
|
| 11 |
+
return chunks
|