Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,15 +36,15 @@ def get_pdf_text(pdf_docs):
|
|
| 36 |
|
| 37 |
# Chunk size and overlap must not exceed the models capacity!
|
| 38 |
#
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
|
| 50 |
# def get_vectorstore(text_chunks):
|
|
|
|
| 36 |
|
| 37 |
# Chunk size and overlap must not exceed the models capacity!
|
| 38 |
#
|
| 39 |
+
def get_text_chunks(text):
|
| 40 |
+
text_splitter = CharacterTextSplitter(
|
| 41 |
+
separator="\n",
|
| 42 |
+
chunk_size=800, # 1000
|
| 43 |
+
chunk_overlap=200,
|
| 44 |
+
length_function=len
|
| 45 |
+
)
|
| 46 |
+
chunks = text_splitter.split_text(text)
|
| 47 |
+
return chunks
|
| 48 |
|
| 49 |
|
| 50 |
# def get_vectorstore(text_chunks):
|