File size: 1,105 Bytes
d5e772a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import pandas as pd
from sklearn.datasets import fetch_openml, load_iris, fetch_california_housing
import os
os.makedirs('sample_data', exist_ok=True)
# 1. Titanic (Classification)
print("Downloading Titanic...")
titanic = fetch_openml('titanic', version=1, as_frame=True, parser='auto')
df_titanic = titanic.frame
# Clean up slightly for better demo
df_titanic = df_titanic.drop(columns=['boat', 'body', 'home.dest'])
df_titanic.to_csv('sample_data/titanic.csv', index=False)
# 2. House Prices / California Housing (Regression)
print("Downloading House Prices...")
california = fetch_california_housing(as_frame=True)
df_cali = california.frame
df_cali.to_csv('sample_data/house_prices.csv', index=False)
# 3. Iris (Multiclass)
print("Downloading Iris...")
iris = load_iris(as_frame=True)
df_iris = iris.frame
# rename target for clarity
df_iris['target'] = df_iris['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
df_iris.rename(columns={'target': 'species'}, inplace=True)
df_iris.to_csv('sample_data/iris.csv', index=False)
print("Demo datasets saved successfully in sample_data/.")
|