Spaces:
Sleeping
Sleeping
File size: 549 Bytes
45fe8b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import os
import pickle
data_path = "data/20_newsgroups"
documents = []
for root, dirs, files in os.walk(data_path):
for file in files:
file_path = os.path.join(root, file)
try:
with open(file_path, "r", encoding="latin1") as f:
documents.append(f.read())
except:
pass
print("Total documents loaded:", len(documents))
os.makedirs("models", exist_ok=True)
with open("models/documents.pkl", "wb") as f:
pickle.dump(documents, f)
print("documents.pkl saved successfully!") |