Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| # Define file paths | |
| raw_file = os.path.join('data', 'complaints_dataset.txt') | |
| clean_csv = os.path.join('data', 'complaints_dataset.csv') | |
| # Load text data | |
| df = pd.read_csv(raw_file, | |
| names=['complaint_text', 'category', 'sentiment', 'urgency']) | |
| # Clean data | |
| df['complaint_text'] = df['complaint_text'].astype(str).str.strip() | |
| df['category'] = df['category'].astype(str).str.title().str.strip() | |
| df['sentiment'] = df['sentiment'].astype(str).str.capitalize().str.strip() | |
| df['urgency'] = df['urgency'].astype(str).str.capitalize().str.strip() | |
| # Remove blanks & duplicates | |
| df = df.dropna().drop_duplicates() | |
| # Print summary | |
| print("✅ Dataset cleaned successfully!") | |
| print("Total Rows:", df.shape[0]) | |
| print("Categories:", df['category'].unique()) | |
| print("Sentiments:", df['sentiment'].unique()) | |
| print("Urgency Levels:", df['urgency'].unique()) | |
| # Save clean CSV | |
| df.to_csv(clean_csv, index=False, encoding='utf-8') | |
| print(f"\n💾 Clean CSV saved to: {clean_csv}") | |