File size: 639 Bytes
f29d474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# Script to preprocess additional text datasets
import os
RAW_DATA_DIR = "data/raw"
PROCESSED_DATA_DIR = "data/processed"
def preprocess_files():
for filename in os.listdir(RAW_DATA_DIR):
raw_file_path = os.path.join(RAW_DATA_DIR, filename)
processed_file_path = os.path.join(PROCESSED_DATA_DIR, filename)
with open(raw_file_path, "r", encoding="utf-8") as rf, open(processed_file_path, "w", encoding="utf-8") as pf:
for line in rf:
pf.write(line.strip() + "\n")
print("✅ Data preprocessing complete!")
if __name__ == "__main__":
preprocess_files()
|