| import re | |
| def preprocess_text(text): | |
| # Remove special characters and digits | |
| text = re.sub(r'\W', ' ', text) | |
| text = re.sub(r'\s+', ' ', text) | |
| return text.strip() | |
| with open("data.csv", "r") as file: | |
| data = file.readlines() | |
| cleaned_data = [preprocess_text(line) for line in data] | |
| # Save the cleaned data | |
| with open("cleaned_data.txt", "w") as file: | |
| for entry in cleaned_data: | |
| file.write(entry + "\n") |