Spaces:

Nguyen5
/

chatbot

Sleeping

Nguyen5 commited on Dec 4, 2025

Commit

4d20c45

1 Parent(s): 9fd5591

commit

Files changed (1) hide show

load_documents.py CHANGED Viewed

@@ -128,33 +128,3 @@ if __name__ == "__main__":
     if len(docs):
         print("\nExample metadata from 1st document:")
         print(docs[0].metadata)
-- split_documents.py:
-# split_documents.py – v2
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-CHUNK_SIZE = 1500
-CHUNK_OVERLAP = 200
-def split_documents(docs):
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=CHUNK_SIZE,
-        chunk_overlap=CHUNK_OVERLAP,
-        separators=["\n\n", "\n", ". ", " ", ""],
-    )
-    chunks = splitter.split_documents(docs)
-    for c in chunks:
-        c.metadata["chunk_size"] = CHUNK_SIZE
-        c.metadata["chunk_overlap"] = CHUNK_OVERLAP
-    return chunks
-if __name__ == "__main__":
-    from load_documents import load_documents
-    docs = load_documents()
-    chunks = split_documents(docs)
-    print("Docs:", len(docs), "Chunks:", len(chunks))
-    print(chunks[0].page_content[:300], chunks[0].metadata)

     if len(docs):
         print("\nExample metadata from 1st document:")
         print(docs[0].metadata)