import os def load_documents(dataset_path): documents = [] labels = [] for category in os.listdir(dataset_path): category_path = os.path.join(dataset_path, category) if os.path.isdir(category_path): for file in os.listdir(category_path): file_path = os.path.join(category_path, file) try: with open(file_path, "r", encoding="latin-1") as f: text = f.read() documents.append(text) labels.append(category) except: continue return documents, labels if __name__ == "__main__": dataset_path = "data/20_newsgroups" docs, labels = load_documents(dataset_path) print("Total documents:", len(docs)) print("Example category:", labels[0])