Add overlap and reduce search count.
Browse files
app.py
CHANGED
|
@@ -9,10 +9,13 @@ from langchain.vectorstores import FAISS
|
|
| 9 |
|
| 10 |
|
| 11 |
# Number of search results to query from the vector database.
|
| 12 |
-
SIMILARITY_SEARCH_COUNT =
|
| 13 |
|
| 14 |
# Size of each document chunk in number of characters.
|
| 15 |
-
CHUNK_SIZE =
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Maximum number of output tokens.
|
| 18 |
MODEL_MAX_LENGTH = 500
|
|
@@ -23,7 +26,7 @@ loader = PyMuPDFLoader("rdna3-shader-instruction-set-architecture-feb-2023_0.pdf
|
|
| 23 |
documents = loader.load()
|
| 24 |
|
| 25 |
print("Creating chunks")
|
| 26 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=
|
| 27 |
chunks = splitter.split_documents(documents)
|
| 28 |
|
| 29 |
print("Creating database")
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
# Number of search results to query from the vector database.
|
| 12 |
+
SIMILARITY_SEARCH_COUNT = 3
|
| 13 |
|
| 14 |
# Size of each document chunk in number of characters.
|
| 15 |
+
CHUNK_SIZE = 800
|
| 16 |
+
|
| 17 |
+
# Chunk overlap in number of characters.
|
| 18 |
+
CHUNK_OVERLAP = 50
|
| 19 |
|
| 20 |
# Maximum number of output tokens.
|
| 21 |
MODEL_MAX_LENGTH = 500
|
|
|
|
| 26 |
documents = loader.load()
|
| 27 |
|
| 28 |
print("Creating chunks")
|
| 29 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
|
| 30 |
chunks = splitter.split_documents(documents)
|
| 31 |
|
| 32 |
print("Creating database")
|