sentimnet-analysis / makecsv.py
deedrop1140's picture
Upload 34 files
8826cea verified
import pandas as pd
import os
# Define file paths
raw_file = os.path.join('data', 'complaints_dataset.txt')
clean_csv = os.path.join('data', 'complaints_dataset.csv')
# Load text data
df = pd.read_csv(raw_file,
names=['complaint_text', 'category', 'sentiment', 'urgency'])
# Clean data
df['complaint_text'] = df['complaint_text'].astype(str).str.strip()
df['category'] = df['category'].astype(str).str.title().str.strip()
df['sentiment'] = df['sentiment'].astype(str).str.capitalize().str.strip()
df['urgency'] = df['urgency'].astype(str).str.capitalize().str.strip()
# Remove blanks & duplicates
df = df.dropna().drop_duplicates()
# Print summary
print("✅ Dataset cleaned successfully!")
print("Total Rows:", df.shape[0])
print("Categories:", df['category'].unique())
print("Sentiments:", df['sentiment'].unique())
print("Urgency Levels:", df['urgency'].unique())
# Save clean CSV
df.to_csv(clean_csv, index=False, encoding='utf-8')
print(f"\n💾 Clean CSV saved to: {clean_csv}")