# Script to preprocess additional text datasets import os RAW_DATA_DIR = "data/raw" PROCESSED_DATA_DIR = "data/processed" def preprocess_files(): for filename in os.listdir(RAW_DATA_DIR): raw_file_path = os.path.join(RAW_DATA_DIR, filename) processed_file_path = os.path.join(PROCESSED_DATA_DIR, filename) with open(raw_file_path, "r", encoding="utf-8") as rf, open(processed_file_path, "w", encoding="utf-8") as pf: for line in rf: pf.write(line.strip() + "\n") print("✅ Data preprocessing complete!") if __name__ == "__main__": preprocess_files()