Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from datasets import load_dataset | |
| import os | |
| def prepare_data(): | |
| dataset = load_dataset("Shramik121/tourism-split-dataset") | |
| data = pd.DataFrame(dataset['train']) | |
| if 'Unnamed: 0' in data.columns: | |
| data = data.drop('Unnamed: 0', axis=1) | |
| num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', | |
| 'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', | |
| 'NumberOfChildrenVisiting', 'MonthlyIncome'] | |
| cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', | |
| 'MaritalStatus', 'Designation', 'CityTier'] | |
| data[num_cols] = data[num_cols].fillna(data[num_cols].median()) | |
| data[cat_cols] = data[cat_cols].fillna('Unknown') | |
| if 'CustomerID' in data: | |
| data = data.drop('CustomerID', axis=1) | |
| if 'Gender' in data: | |
| data['Gender'] = data['Gender'].replace('Fe Male', 'Female') | |
| output_dir = os.getenv('OUTPUT_DIR', 'data') | |
| os.makedirs(output_dir, exist_ok=True) | |
| data.to_csv(os.path.join(output_dir, 'processed.csv'), index=False) | |
| test_data = pd.DataFrame(dataset['test']) | |
| if 'Unnamed: 0' in test_data.columns: | |
| test_data = test_data.drop('Unnamed: 0', axis=1) | |
| test_data[num_cols] = test_data[num_cols].fillna(data[num_cols].median()) | |
| test_data[cat_cols] = test_data[cat_cols].fillna('Unknown') | |
| if 'CustomerID' in test_data: | |
| test_data = test_data.drop('CustomerID', axis=1) | |
| if 'Gender' in test_data: | |
| test_data['Gender'] = test_data['Gender'].replace('Fe Male', 'Female') | |
| test_data.to_csv(os.path.join(output_dir, 'test.csv'), index=False) | |
| print("Data prepared and saved to", output_dir) | |
| if __name__ == "__main__": | |
| prepare_data() | |