Spaces:
Sleeping
Sleeping
| import os | |
| import pickle | |
| data_path = "data/20_newsgroups" | |
| documents = [] | |
| for root, dirs, files in os.walk(data_path): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| try: | |
| with open(file_path, "r", encoding="latin1") as f: | |
| documents.append(f.read()) | |
| except: | |
| pass | |
| print("Total documents loaded:", len(documents)) | |
| os.makedirs("models", exist_ok=True) | |
| with open("models/documents.pkl", "wb") as f: | |
| pickle.dump(documents, f) | |
| print("documents.pkl saved successfully!") |