Nguyen5 commited on
Commit
4d20c45
·
1 Parent(s): 9fd5591
Files changed (1) hide show
  1. load_documents.py +0 -30
load_documents.py CHANGED
@@ -128,33 +128,3 @@ if __name__ == "__main__":
128
  if len(docs):
129
  print("\nExample metadata from 1st document:")
130
  print(docs[0].metadata)
131
-
132
-
133
- - split_documents.py:
134
- # split_documents.py – v2
135
-
136
- from langchain_text_splitters import RecursiveCharacterTextSplitter
137
-
138
- CHUNK_SIZE = 1500
139
- CHUNK_OVERLAP = 200
140
-
141
- def split_documents(docs):
142
- splitter = RecursiveCharacterTextSplitter(
143
- chunk_size=CHUNK_SIZE,
144
- chunk_overlap=CHUNK_OVERLAP,
145
- separators=["\n\n", "\n", ". ", " ", ""],
146
- )
147
- chunks = splitter.split_documents(docs)
148
-
149
- for c in chunks:
150
- c.metadata["chunk_size"] = CHUNK_SIZE
151
- c.metadata["chunk_overlap"] = CHUNK_OVERLAP
152
-
153
- return chunks
154
-
155
- if __name__ == "__main__":
156
- from load_documents import load_documents
157
- docs = load_documents()
158
- chunks = split_documents(docs)
159
- print("Docs:", len(docs), "Chunks:", len(chunks))
160
- print(chunks[0].page_content[:300], chunks[0].metadata)
 
128
  if len(docs):
129
  print("\nExample metadata from 1st document:")
130
  print(docs[0].metadata)