import pandas as pd import os # Define file paths raw_file = os.path.join('data', 'complaints_dataset.txt') clean_csv = os.path.join('data', 'complaints_dataset.csv') # Load text data df = pd.read_csv(raw_file, names=['complaint_text', 'category', 'sentiment', 'urgency']) # Clean data df['complaint_text'] = df['complaint_text'].astype(str).str.strip() df['category'] = df['category'].astype(str).str.title().str.strip() df['sentiment'] = df['sentiment'].astype(str).str.capitalize().str.strip() df['urgency'] = df['urgency'].astype(str).str.capitalize().str.strip() # Remove blanks & duplicates df = df.dropna().drop_duplicates() # Print summary print("āœ… Dataset cleaned successfully!") print("Total Rows:", df.shape[0]) print("Categories:", df['category'].unique()) print("Sentiments:", df['sentiment'].unique()) print("Urgency Levels:", df['urgency'].unique()) # Save clean CSV df.to_csv(clean_csv, index=False, encoding='utf-8') print(f"\nšŸ’¾ Clean CSV saved to: {clean_csv}")