import os import pickle data_path = "data/20_newsgroups" documents = [] for root, dirs, files in os.walk(data_path): for file in files: file_path = os.path.join(root, file) try: with open(file_path, "r", encoding="latin1") as f: documents.append(f.read()) except: pass print("Total documents loaded:", len(documents)) os.makedirs("models", exist_ok=True) with open("models/documents.pkl", "wb") as f: pickle.dump(documents, f) print("documents.pkl saved successfully!")