Spaces:

corbanyax
/

tabBench

Running

atharvavdeo

Fix TabFM handling for uploaded datasets

68f889f about 4 hours ago

70.9 kB

	from __future__ import annotations

	import importlib
	import math
	import traceback
	import time
	from dataclasses import dataclass
	from functools import lru_cache
	from pathlib import Path
	from typing import Callable

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	from sklearn.base import clone
	from sklearn.compose import ColumnTransformer
	from sklearn.datasets import fetch_california_housing, fetch_openml, load_breast_cancer, load_diabetes, load_digits, load_iris, load_wine, make_classification
	from sklearn.dummy import DummyClassifier, DummyRegressor
	from sklearn.ensemble import (
	AdaBoostClassifier,
	AdaBoostRegressor,
	ExtraTreesClassifier,
	ExtraTreesRegressor,
	GradientBoostingClassifier,
	GradientBoostingRegressor,
	HistGradientBoostingClassifier,
	HistGradientBoostingRegressor,
	RandomForestClassifier,
	RandomForestRegressor,
	)
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import BayesianRidge, LinearRegression, LogisticRegression, Ridge
	from sklearn.metrics import (
	accuracy_score,
	f1_score,
	mean_absolute_error,
	mean_squared_error,
	r2_score,
	roc_auc_score,
	)
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import GaussianNB
	from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder, StandardScaler
	from sklearn.svm import SVC, SVR


	APP_TITLE = "tabBench"
	TABFM_MODEL_ID = "google/tabfm-1.0.0-pytorch"
	RANDOM_STATE = 42
	CANDY_DATA_URL = "https://raw.githubusercontent.com/fivethirtyeight/data/master/candy-power-ranking/candy-data.csv"
	GOOGLE_COLORS = ["#4285F4", "#DB4437", "#F4B400", "#0F9D58", "#A142F4", "#00ACC1"]
	METRIC_CHOICES = ["accuracy", "f1_weighted", "roc_auc", "rmse", "mae", "r2", "seconds"]
	TABFM_PRESETS = {
	"Fast": {"n_estimators": 1, "max_num_rows": 256, "max_num_features": 64, "batch_size": 1, "enable_nnls": False, "n_feature_crosses": 0, "n_svd_features": 0, "max_eval_rows": 256},
	"Balanced": {"n_estimators": 4, "max_num_rows": 512, "max_num_features": 128, "batch_size": 1, "enable_nnls": False, "n_feature_crosses": 0, "n_svd_features": 0, "max_eval_rows": 512},
	"Default": {"n_estimators": 32, "max_num_rows": None, "max_num_features": 500, "batch_size": 1, "enable_nnls": False, "n_feature_crosses": 0, "n_svd_features": 0, "max_eval_rows": 1000},
	"Ensemble": {"n_estimators": 32, "max_num_rows": None, "max_num_features": 500, "batch_size": 1, "enable_nnls": True, "n_feature_crosses": "sqrt", "n_svd_features": "sqrt", "max_eval_rows": 1000},
	}


	@dataclass(frozen=True)
	class DatasetSpec:
	name: str
	task: str
	target: str
	source: str
	rows: int
	description: str
	loader: Callable[[int, int], pd.DataFrame]


	def _add_categorical_noise(df: pd.DataFrame, rng: np.random.Generator, prefix: str) -> pd.DataFrame:
	df = df.copy()
	df[f"{prefix}_segment"] = rng.choice(["A", "B", "C", "D"], len(df), p=[0.35, 0.25, 0.25, 0.15])
	df[f"{prefix}_region"] = rng.choice(["north", "south", "east", "west"], len(df))
	return df


	def sample_df(df: pd.DataFrame, limit: int, seed: int) -> pd.DataFrame:
	return df.sample(min(limit, len(df)), random_state=seed).reset_index(drop=True)


	def find_first_data_file(root: str \| Path, suffixes: tuple[str, ...]) -> Path:
	root = Path(root)
	for suffix in suffixes:
	matches = sorted(root.rglob(f"*{suffix}"))
	if matches:
	return matches[0]
	raise FileNotFoundError(f"No data file with suffixes {suffixes} found in {root}")


	def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()
	df.columns = (
	pd.Index(df.columns)
	.astype(str)
	.str.strip()
	.str.lower()
	.str.replace(r"[^0-9a-z]+", "_", regex=True)
	.str.strip("_")
	)
	return df


	def choose_col(df: pd.DataFrame, candidates: list[str], contains: list[str] \| None = None) -> str:
	normalized = {c.lower(): c for c in df.columns}
	for candidate in candidates:
	key = candidate.lower()
	if key in normalized:
	return normalized[key]
	if contains:
	for col in df.columns:
	lower = str(col).lower()
	if all(part in lower for part in contains):
	return col
	raise KeyError(f"None of {candidates} found in columns.")


	def kaggle_csv(dataset_id: str, preferred_names: tuple[str, ...] = ()) -> pd.DataFrame:
	import kagglehub

	path = Path(kagglehub.dataset_download(dataset_id))
	csvs = sorted(path.rglob("*.csv"))
	if preferred_names:
	for preferred in preferred_names:
	for csv in csvs:
	if preferred.lower() in csv.name.lower():
	return pd.read_csv(csv)
	if not csvs:
	raise FileNotFoundError(f"No CSV files found in Kaggle dataset {dataset_id}.")
	return pd.read_csv(csvs[0])


	def select_numeric_features(df: pd.DataFrame, target: str, max_features: int = 12) -> pd.DataFrame:
	numeric = df.select_dtypes(include=np.number).columns.tolist()
	cols = [c for c in numeric if c != target][:max_features]
	return df[[*cols, target]].dropna(subset=[target])


	@lru_cache(maxsize=1)
	def openml_titanic() -> pd.DataFrame:
	data = fetch_openml(data_id=40945, as_frame=True, parser="auto")
	df = data.frame.copy()
	keep = [c for c in ["pclass", "sex", "age", "sibsp", "parch", "fare", "embarked", "survived"] if c in df.columns]
	return df[keep].dropna(subset=["survived"])


	@lru_cache(maxsize=1)
	def openml_ames_housing() -> pd.DataFrame:
	data = fetch_openml(data_id=42165, as_frame=True, parser="auto")
	df = data.frame.copy()
	target = "SalePrice" if "SalePrice" in df.columns else data.target_names[0]
	useful = [
	"OverallQual",
	"GrLivArea",
	"GarageCars",
	"GarageArea",
	"TotalBsmtSF",
	"FullBath",
	"YearBuilt",
	"Neighborhood",
	"HouseStyle",
	target,
	]
	cols = [c for c in useful if c in df.columns]
	return df[cols].rename(columns={target: "sale_price"}).dropna(subset=["sale_price"])


	@lru_cache(maxsize=1)
	def openml_adult_income() -> pd.DataFrame:
	data = fetch_openml(data_id=1590, as_frame=True, parser="auto")
	df = data.frame.copy()
	if "class" in df.columns:
	df = df.rename(columns={"class": "income_gt_50k"})
	elif "income" in df.columns:
	df = df.rename(columns={"income": "income_gt_50k"})
	return df.dropna(subset=["income_gt_50k"])


	@lru_cache(maxsize=1)
	def sklearn_california_housing() -> pd.DataFrame:
	data = fetch_california_housing(as_frame=True)
	df = data.frame.rename(columns={"MedHouseVal": "median_house_value"})
	return df


	@lru_cache(maxsize=1)
	def fivethirtyeight_candy() -> pd.DataFrame:
	df = pd.read_csv(CANDY_DATA_URL)
	return df.drop(columns=[c for c in ["competitorname"] if c in df.columns]).dropna(subset=["winpercent"])


	@lru_cache(maxsize=1)
	def kaggle_credit_card_fraud() -> pd.DataFrame:
	import kagglehub

	path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
	csv_path = find_first_data_file(path, (".csv",))
	df = pd.read_csv(csv_path)
	if "Class" in df.columns:
	df = df.rename(columns={"Class": "fraud"})
	return df.dropna(subset=["fraud"])


	@lru_cache(maxsize=1)
	def kaggle_epirecipes() -> pd.DataFrame:
	import kagglehub

	path = kagglehub.dataset_download("hugodarwood/epirecipes")
	try:
	json_path = find_first_data_file(path, (".json",))
	df = pd.read_json(json_path)
	except FileNotFoundError:
	csv_path = find_first_data_file(path, (".csv",))
	df = pd.read_csv(csv_path)
	if "rating" not in df.columns:
	raise ValueError("Epicurious dataset does not include a rating column.")
	preferred = [
	"calories",
	"protein",
	"fat",
	"sodium",
	"dessert",
	"dinner",
	"breakfast",
	"healthy",
	"vegetarian",
	"vegan",
	"cakeweek",
	"rating",
	]
	cols = [c for c in preferred if c in df.columns]
	return df[cols].dropna(subset=["rating"])


	@lru_cache(maxsize=1)
	def kaggle_calcofi() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("sohier/calcofi", ("bottle",)))
	target = choose_col(df, ["t_deg_c", "temperature"], ["t", "deg"])
	salinity = choose_col(df, ["salnty", "salinity"], ["sal"])
	cols = [c for c in [salinity, "depthm", "o2ml_l", "sio3um", "no3um", "po4um", target] if c in df.columns]
	return df[cols].rename(columns={target: "water_temperature", salinity: "salinity"}).dropna(subset=["water_temperature"])


	@lru_cache(maxsize=1)
	def kaggle_szeged_weather() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("budincsevity/szeged-weather"))
	target = choose_col(df, ["apparent_temperature_c", "apparent_temperature"], ["apparent", "temperature"])
	cols = [c for c in ["temperature_c", "humidity", "wind_speed_km_h", "wind_bearing_degrees", "visibility_km", "pressure_millibars", "summary", "precip_type", target] if c in df.columns]
	return df[cols].rename(columns={target: "apparent_temperature"}).dropna(subset=["apparent_temperature"])


	@lru_cache(maxsize=1)
	def kaggle_weather_ww2() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("smid80/weatherww2", ("summary",)))
	target = choose_col(df, ["maxtemp", "max_temp", "max"], ["max"])
	cols = [c for c in ["mintemp", "meantemp", "precip", "snowfall", "yr", "mo", "da", target] if c in df.columns]
	return df[cols].rename(columns={target: "max_temperature"}).dropna(subset=["max_temperature"])


	@lru_cache(maxsize=1)
	def kaggle_montreal_bike_lanes() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("pablomonleon/montreal-bike-lanes"))
	numeric = df.select_dtypes(include=np.number)
	if numeric.shape[1] < 2:
	raise ValueError("Montreal bike lanes dataset needs at least two numeric count columns.")
	target = numeric.columns[-1]
	cols = numeric.columns[: min(8, len(numeric.columns))].tolist()
	if target not in cols:
	cols.append(target)
	return df[cols].rename(columns={target: "rider_count"}).dropna(subset=["rider_count"])


	@lru_cache(maxsize=1)
	def kaggle_nyc_bike_crossings() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("new-york-city/nyc-east-river-bicycle-crossings"))
	numeric = df.select_dtypes(include=np.number)
	target = choose_col(numeric, ["total", "total_bicycle_count"], ["total"])
	cols = [c for c in numeric.columns[:8] if c != target]
	return df[[*cols, target]].rename(columns={target: "total_bike_crossings"}).dropna(subset=["total_bike_crossings"])


	@lru_cache(maxsize=1)
	def kaggle_uk_road_safety() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("bluehorseshoe/uk-2016-road-safety-data", ("accident",)))
	target = choose_col(df, ["number_of_casualties", "casualties"], ["casual"])
	preferred = [
	"number_of_vehicles",
	"day_of_week",
	"speed_limit",
	"light_conditions",
	"weather_conditions",
	"road_surface_conditions",
	"urban_or_rural_area",
	target,
	]
	cols = [c for c in preferred if c in df.columns]
	return df[cols].rename(columns={target: "casualty_count"}).dropna(subset=["casualty_count"])


	@lru_cache(maxsize=1)
	def kaggle_kcbs_bbq() -> pd.DataFrame:
	df = normalize_columns(kaggle_csv("jaysobel/kcbs-bbq"))
	numeric = df.select_dtypes(include=np.number)
	if "place" in df.columns:
	target = "place"
	elif "rank" in df.columns:
	target = "rank"
	else:
	target = numeric.columns[0]
	out = df.copy()
	out["first_place"] = pd.to_numeric(out[target], errors="coerce").eq(1).astype(int)
	feature_cols = [c for c in numeric.columns if c != target][:10]
	categorical_cols = [c for c in out.columns if c not in numeric.columns and c != target][:4]
	return out[[feature_cols, categorical_cols, "first_place"]].dropna(subset=["first_place"])


	def load_iris_df(limit: int, seed: int) -> pd.DataFrame:
	data = load_iris(as_frame=True)
	df = data.frame.rename(columns={"target": "species"})
	df["species"] = df["species"].map(dict(enumerate(data.target_names)))
	return df.sample(min(limit, len(df)), random_state=seed)


	def load_wine_df(limit: int, seed: int) -> pd.DataFrame:
	data = load_wine(as_frame=True)
	df = data.frame.rename(columns={"target": "wine_class"})
	return df.sample(min(limit, len(df)), random_state=seed)


	def load_breast_cancer_df(limit: int, seed: int) -> pd.DataFrame:
	data = load_breast_cancer(as_frame=True)
	df = data.frame.rename(columns={"target": "diagnosis"})
	df["diagnosis"] = df["diagnosis"].map({0: "malignant", 1: "benign"})
	return df.sample(min(limit, len(df)), random_state=seed)


	def load_digits_df(limit: int, seed: int) -> pd.DataFrame:
	data = load_digits(as_frame=True)
	df = data.frame.rename(columns={"target": "digit"})
	return df.sample(min(limit, len(df)), random_state=seed)


	def load_diabetes_df(limit: int, seed: int) -> pd.DataFrame:
	data = load_diabetes(as_frame=True)
	df = data.frame.rename(columns={"target": "disease_progression"})
	return df.sample(min(limit, len(df)), random_state=seed)


	def load_california_housing_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(sklearn_california_housing(), limit, seed)
	except Exception:
	return load_synthetic_housing_df(limit, seed).rename(columns={"sale_price": "median_house_value"})


	def load_ames_housing_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(openml_ames_housing(), limit, seed)
	except Exception:
	return load_synthetic_housing_df(limit, seed)


	def load_synthetic_housing_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 12000)
	bedrooms = rng.integers(1, 7, n)
	sqft = rng.normal(1750, 650, n).clip(450, 5200)
	age = rng.integers(0, 90, n)
	zipcode = rng.choice(["94016", "98101", "10011", "60614", "78704", "30309"], n)
	price = 120000 + sqft * rng.normal(230, 20, n) + bedrooms * 18000 - age * 1400
	price += pd.Series(zipcode).map({"94016": 260000, "98101": 140000, "10011": 210000, "60614": 80000, "78704": 110000, "30309": 70000}).to_numpy()
	price += rng.normal(0, 45000, n)
	return pd.DataFrame(
	{
	"sqft": sqft.round(0),
	"bedrooms": bedrooms,
	"home_age": age,
	"zipcode": zipcode,
	"has_garage": rng.choice(["yes", "no"], n, p=[0.72, 0.28]),
	"sale_price": price.round(0),
	}
	)


	def load_titanic_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 891)
	sex = rng.choice(["female", "male"], n, p=[0.38, 0.62])
	pclass = rng.choice([1, 2, 3], n, p=[0.24, 0.21, 0.55])
	age = rng.normal(30, 14, n).clip(0.5, 78)
	fare = np.exp(rng.normal(3.1, 0.85, n)) * (4 - pclass)
	embarked = rng.choice(["S", "C", "Q"], n, p=[0.72, 0.19, 0.09])
	logit = 1.6 * (sex == "female") + 0.9 * (pclass == 1) + 0.25 * (pclass == 2) - 0.025 * age + 0.01 * fare - 1.1
	survived = rng.binomial(1, 1 / (1 + np.exp(-logit)))
	return pd.DataFrame(
	{
	"pclass": pclass,
	"sex": sex,
	"age": age.round(1),
	"sibsp": rng.integers(0, 5, n),
	"parch": rng.integers(0, 4, n),
	"fare": fare.round(2),
	"embarked": embarked,
	"survived": survived,
	}
	)


	def load_titanic_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(openml_titanic(), limit, seed)
	except Exception:
	return load_titanic_proxy_df(limit, seed)


	def load_credit_fraud_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 50000)
	x, y = make_classification(
	n_samples=n,
	n_features=18,
	n_informative=8,
	n_redundant=4,
	weights=[0.985, 0.015],
	class_sep=1.6,
	random_state=seed,
	)
	df = pd.DataFrame(x, columns=[f"v{i}" for i in range(1, 19)])
	df["amount"] = np.exp(rng.normal(3.2, 1.0, n)).round(2)
	df["merchant_category"] = rng.choice(["travel", "grocery", "electronics", "fuel", "cash"], n)
	df["fraud"] = y
	return df


	def load_credit_fraud_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_credit_card_fraud(), limit, seed)
	except Exception:
	return load_credit_fraud_proxy_df(limit, seed)


	def load_epirecipes_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 20000)
	calories = rng.gamma(4, 120, n)
	protein = rng.gamma(2, 12, n)
	fat = rng.gamma(2.5, 9, n)
	sodium = rng.gamma(2.4, 180, n)
	course = rng.choice(["main", "dessert", "side", "salad", "breakfast"], n)
	cuisine = rng.choice(["american", "italian", "mexican", "asian", "mediterranean"], n)
	rating = 2.8 + 0.12 * (course == "dessert") + 0.18 * (cuisine == "italian") - 0.00035 * sodium + rng.normal(0, 0.65, n)
	return pd.DataFrame(
	{
	"calories": calories.round(0),
	"protein": protein.round(1),
	"fat": fat.round(1),
	"sodium": sodium.round(0),
	"course": course,
	"cuisine": cuisine,
	"make_again": rng.choice(["yes", "no"], n, p=[0.66, 0.34]),
	"rating": rating.clip(0, 5).round(2),
	}
	)


	def load_epirecipes_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_epirecipes(), limit, seed)
	except Exception:
	return load_epirecipes_proxy_df(limit, seed)


	def load_epirecipes_cakeweek_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	df = kaggle_epirecipes().copy()
	if "cakeweek" not in df.columns:
	raise KeyError("cakeweek")
	df["cakeweek"] = pd.to_numeric(df["cakeweek"], errors="coerce").fillna(0).astype(int)
	return sample_df(df, limit, seed)
	except Exception:
	df = load_epirecipes_proxy_df(limit, seed).copy()
	df["cakeweek"] = ((df["course"] == "dessert") & (df["rating"] >= df["rating"].median())).astype(int)
	return df


	def load_candy_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 1200)
	chocolate = rng.binomial(1, 0.45, n)
	fruity = rng.binomial(1, 0.38, n)
	caramel = rng.binomial(1, 0.24, n)
	pricepercent = rng.beta(2, 4, n)
	sugarpercent = rng.beta(3, 2, n)
	winpercent = 35 + 18 * chocolate + 8 * caramel + 9 * sugarpercent - 10 * pricepercent + rng.normal(0, 8, n)
	return pd.DataFrame(
	{
	"chocolate": chocolate,
	"fruity": fruity,
	"caramel": caramel,
	"peanutyalmondy": rng.binomial(1, 0.2, n),
	"nougat": rng.binomial(1, 0.14, n),
	"crispedricewafer": rng.binomial(1, 0.16, n),
	"hard": rng.binomial(1, 0.28, n),
	"bar": rng.binomial(1, 0.36, n),
	"sugarpercent": sugarpercent.round(3),
	"pricepercent": pricepercent.round(3),
	"winpercent": winpercent.clip(5, 95).round(2),
	}
	)


	def load_candy_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(fivethirtyeight_candy(), limit, seed)
	except Exception:
	return load_candy_proxy_df(limit, seed)


	def load_candy_chocolate_df(limit: int, seed: int) -> pd.DataFrame:
	df = load_candy_df(limit, seed).copy()
	if "chocolate" not in df.columns:
	raise gr.Error("Candy dataset does not include the chocolate target.")
	return df


	def load_adult_income_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 30000)
	education_num = rng.integers(6, 17, n)
	hours = rng.normal(40, 12, n).clip(1, 80)
	age = rng.normal(39, 13, n).clip(18, 75)
	occupation = rng.choice(["tech", "sales", "ops", "admin", "service", "exec"], n)
	logit = -6 + 0.16 * age + 0.36 * education_num + 0.035 * hours + 0.9 * (occupation == "exec") + 0.55 * (occupation == "tech")
	income = rng.binomial(1, 1 / (1 + np.exp(-logit)))
	return pd.DataFrame(
	{
	"age": age.round(0),
	"education_num": education_num,
	"hours_per_week": hours.round(0),
	"occupation": occupation,
	"marital_status": rng.choice(["single", "married", "divorced"], n),
	"income_gt_50k": income,
	}
	)


	def load_adult_income_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(openml_adult_income(), limit, seed)
	except Exception:
	return load_adult_income_proxy_df(limit, seed)


	def load_bike_demand_proxy_df(limit: int, seed: int) -> pd.DataFrame:
	rng = np.random.default_rng(seed)
	n = min(limit, 15000)
	hour = rng.integers(0, 24, n)
	temp = rng.normal(21, 9, n).clip(-5, 40)
	workingday = rng.binomial(1, 0.69, n)
	weather = rng.choice(["clear", "mist", "rain", "storm"], n, p=[0.55, 0.28, 0.14, 0.03])
	commute_peak = ((hour >= 7) & (hour <= 9)) \| ((hour >= 16) & (hour <= 18))
	count = 80 + 115 * commute_peak + 5.5 * temp + 45 * workingday - 75 * (weather == "rain") - 130 * (weather == "storm")
	count += rng.normal(0, 45, n)
	return pd.DataFrame({"hour": hour, "temp": temp.round(1), "workingday": workingday, "weather": weather, "rental_count": count.clip(0).round(0)})


	def load_calcofi_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_calcofi(), limit, seed)
	except Exception:
	rng = np.random.default_rng(seed)
	n = min(limit, 12000)
	salinity = rng.normal(33.5, 0.7, n)
	depth = rng.gamma(2.0, 40.0, n)
	temp = 23 - 0.38 * depth / 10 - 1.7 * (salinity - 33.5) + rng.normal(0, 1.8, n)
	return pd.DataFrame({"salinity": salinity, "depthm": depth, "water_temperature": temp})


	def load_szeged_weather_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_szeged_weather(), limit, seed)
	except Exception:
	rng = np.random.default_rng(seed)
	n = min(limit, 20000)
	humidity = rng.beta(4, 2, n)
	temp = rng.normal(12, 10, n)
	apparent = temp - 5 * humidity + rng.normal(0, 2.5, n)
	return pd.DataFrame({"temperature_c": temp, "humidity": humidity, "wind_speed_km_h": rng.gamma(2, 4, n), "apparent_temperature": apparent})


	def load_weather_ww2_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_weather_ww2(), limit, seed)
	except Exception:
	rng = np.random.default_rng(seed)
	n = min(limit, 15000)
	mintemp = rng.normal(15, 9, n)
	return pd.DataFrame({"mintemp": mintemp, "precip": rng.gamma(1.5, 2, n), "max_temperature": mintemp + rng.normal(8, 3, n)})


	def load_montreal_bike_lanes_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_montreal_bike_lanes(), limit, seed)
	except Exception:
	return load_bike_demand_proxy_df(limit, seed).rename(columns={"rental_count": "rider_count"})


	def load_nyc_bike_crossings_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_nyc_bike_crossings(), limit, seed)
	except Exception:
	df = load_bike_demand_proxy_df(limit, seed)
	df["brooklyn_bridge"] = (df["rental_count"] * 0.32).round()
	df["manhattan_bridge"] = (df["rental_count"] * 0.27).round()
	df["total_bike_crossings"] = df["rental_count"]
	return df.drop(columns=["rental_count"])


	def load_uk_road_safety_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_uk_road_safety(), limit, seed)
	except Exception:
	rng = np.random.default_rng(seed)
	n = min(limit, 30000)
	vehicles = rng.integers(1, 5, n)
	speed = rng.choice([20, 30, 40, 50, 60, 70], n)
	casualties = rng.poisson(0.4 + vehicles * 0.28 + (speed > 50) * 0.25, n)
	return pd.DataFrame({"number_of_vehicles": vehicles, "speed_limit": speed, "light_conditions": rng.choice(["daylight", "dark"], n), "casualty_count": casualties})


	def load_kcbs_bbq_df(limit: int, seed: int) -> pd.DataFrame:
	try:
	return sample_df(kaggle_kcbs_bbq(), limit, seed)
	except Exception:
	rng = np.random.default_rng(seed)
	n = min(limit, 8000)
	score = rng.normal(165, 12, n)
	first = rng.binomial(1, 1 / (1 + np.exp(-(score - 184) / 5)))
	return pd.DataFrame({"score": score, "contest_size": rng.integers(10, 80, n), "category": rng.choice(["chicken", "ribs", "pork", "brisket"], n), "first_place": first})


	DATASETS: list[DatasetSpec] = [
	DatasetSpec("Titanic Survival", "classification", "survived", "OpenML data_id=40945", 1309, "Mixed categorical/numeric binary classification.", load_titanic_df),
	DatasetSpec("Ames Housing Prices", "regression", "sale_price", "OpenML data_id=42165", 1460, "Ames real-estate regression with neighborhood and quality features.", load_ames_housing_df),
	DatasetSpec("California Housing", "regression", "median_house_value", "sklearn California housing", 20640, "Block-level California housing value regression.", load_california_housing_df),
	DatasetSpec("Credit Card Fraud", "classification", "fraud", "KaggleHub mlg-ulb/creditcardfraud", 284807, "Large imbalanced binary fraud task.", load_credit_fraud_df),
	DatasetSpec("Epicurious Recipes", "regression", "rating", "KaggleHub hugodarwood/epirecipes", 20000, "Recipe nutrition and tags to rating.", load_epirecipes_df),
	DatasetSpec("Halloween Candy", "regression", "winpercent", "FiveThirtyEight GitHub CSV", 85, "Candy attributes to popularity score.", load_candy_df),
	DatasetSpec("Candy Chocolate", "classification", "chocolate", "FiveThirtyEight GitHub CSV", 85, "Predict whether a candy is chocolate from other candy attributes.", load_candy_chocolate_df),
	DatasetSpec("Epicurious Cakeweek", "classification", "cakeweek", "KaggleHub hugodarwood/epirecipes", 20000, "Predict cakeweek recipes from nutrition and recipe tags.", load_epirecipes_cakeweek_df),
	DatasetSpec("CalCOFI Ocean Temperature", "regression", "water_temperature", "KaggleHub sohier/calcofi", 864863, "Predict ocean water temperature from salinity and chemistry readings.", load_calcofi_df),
	DatasetSpec("Szeged Apparent Temperature", "regression", "apparent_temperature", "KaggleHub budincsevity/szeged-weather", 96453, "Predict apparent temperature from humidity, wind, pressure, and weather.", load_szeged_weather_df),
	DatasetSpec("WW2 Max Temperature", "regression", "max_temperature", "KaggleHub smid80/weatherww2", 119040, "Predict daily maximum temperature from minimum temperature and weather fields.", load_weather_ww2_df),
	DatasetSpec("Montreal Bike Lane Counts", "regression", "rider_count", "KaggleHub pablomonleon/montreal-bike-lanes", 319, "Predict rider counts on one Montreal bike path from other paths.", load_montreal_bike_lanes_df),
	DatasetSpec("NYC Bike Crossings", "regression", "total_bike_crossings", "KaggleHub new-york-city/nyc-east-river-bicycle-crossings", 210, "Predict total East River bicycle crossings from bridge counts.", load_nyc_bike_crossings_df),
	DatasetSpec("UK Road Casualties", "regression", "casualty_count", "KaggleHub bluehorseshoe/uk-2016-road-safety-data", 136621, "Predict accident casualty count from road safety fields.", load_uk_road_safety_df),
	DatasetSpec("KCBS BBQ First Place", "classification", "first_place", "KaggleHub jaysobel/kcbs-bbq", 1559, "Predict whether a BBQ competition team wins first place.", load_kcbs_bbq_df),
	DatasetSpec("Adult Income", "classification", "income_gt_50k", "OpenML data_id=1590", 48842, "Demographic and work attributes to income bucket.", load_adult_income_df),
	DatasetSpec("Bike Demand", "regression", "rental_count", "Kaggle-style proxy", 15000, "Weather and time features to rental demand.", load_bike_demand_proxy_df),
	DatasetSpec("Iris", "classification", "species", "sklearn", 150, "Classic multi-class flower classification.", load_iris_df),
	DatasetSpec("Wine", "classification", "wine_class", "sklearn", 178, "Chemical analysis to cultivar class.", load_wine_df),
	DatasetSpec("Breast Cancer", "classification", "diagnosis", "sklearn", 569, "Diagnostic measurements to benign/malignant label.", load_breast_cancer_df),
	DatasetSpec("Digits", "classification", "digit", "sklearn", 1797, "Pixel features to handwritten digit class.", load_digits_df),
	DatasetSpec("Diabetes", "regression", "disease_progression", "sklearn", 442, "Clinical variables to disease progression.", load_diabetes_df),
	]


	def dataset_names() -> list[str]:
	return [d.name for d in DATASETS]


	def get_spec(name: str) -> DatasetSpec:
	return next(d for d in DATASETS if d.name == name)


	def get_dataset(name: str, sample_size: int, seed: int) -> pd.DataFrame:
	spec = get_spec(name)
	return spec.loader(sample_size, seed).reset_index(drop=True)


	def split_xy(df: pd.DataFrame, target: str) -> tuple[pd.DataFrame, pd.Series]:
	cleaned = df.dropna(axis=1, how="all").copy()
	if target not in cleaned.columns:
	raise gr.Error(f"Target column '{target}' was not found.")
	y = cleaned[target]
	x = cleaned.drop(columns=[target])
	if x.empty:
	raise gr.Error("Dataset must include at least one feature column.")
	return x, y


	def coerce_numeric_target(y: pd.Series) -> pd.Series:
	if y.dtype.kind in "ifu":
	return pd.to_numeric(y, errors="coerce")
	cleaned = y.astype("string").str.strip().str.replace(",", "", regex=False)
	return pd.to_numeric(cleaned, errors="coerce")


	def prepare_xy(df: pd.DataFrame, target: str, task: str \| None) -> tuple[pd.DataFrame, pd.Series, str]:
	x, raw_y = split_xy(df, target)
	inferred_task = task or infer_task(raw_y)
	y = raw_y.copy()

	if inferred_task == "regression":
	y = coerce_numeric_target(y)
	valid_target = y.notna() & np.isfinite(y.to_numpy(dtype=float))
	if valid_target.sum() < 2:
	raise gr.Error("Regression target must contain at least two numeric values.")
	dropped = len(y) - int(valid_target.sum())
	x = x.loc[valid_target].reset_index(drop=True)
	y = y.loc[valid_target].reset_index(drop=True)
	if dropped and x.empty:
	raise gr.Error("No usable rows remain after dropping non-numeric regression targets.")
	else:
	valid_target = raw_y.notna()
	if valid_target.sum() < 2:
	raise gr.Error("Classification target must contain at least two non-empty values.")
	x = x.loc[valid_target].reset_index(drop=True)
	y = raw_y.loc[valid_target].reset_index(drop=True)

	if x.empty:
	raise gr.Error("Dataset must include at least one feature column.")
	return x, y, inferred_task


	def infer_task(y: pd.Series) -> str:
	if y.dtype.kind in "ifu" and y.nunique(dropna=True) > 20:
	return "regression"
	numeric_y = coerce_numeric_target(y)
	non_missing = y.notna().sum()
	numeric_non_missing = numeric_y.notna().sum()
	if non_missing and numeric_non_missing / non_missing >= 0.9 and numeric_y.nunique(dropna=True) > 20:
	return "regression"
	return "classification"


	def make_preprocessor(x: pd.DataFrame, scale_numeric: bool = False) -> ColumnTransformer:
	numeric_cols = x.select_dtypes(include=np.number).columns.tolist()
	categorical_cols = [c for c in x.columns if c not in numeric_cols]
	numeric_steps: list[tuple[str, object]] = [("impute", SimpleImputer(strategy="median"))]
	if scale_numeric:
	numeric_steps.append(("scale", StandardScaler()))
	transformers: list[tuple[str, object, list[str]]] = []
	if numeric_cols:
	transformers.append(("num", Pipeline(numeric_steps), numeric_cols))
	if categorical_cols:
	transformers.append(
	(
	"cat",
	Pipeline(
	[
	("impute", SimpleImputer(strategy="most_frequent")),
	("encode", OneHotEncoder(handle_unknown="ignore", sparse_output=False, max_categories=32)),
	]
	),
	categorical_cols,
	)
	)
	return ColumnTransformer(transformers=transformers, remainder="drop", verbose_feature_names_out=False)


	def available_baselines(task: str) -> dict[str, object]:
	if task == "classification":
	models: dict[str, object] = {
	"Logistic": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", LogisticRegression(max_iter=800))]),
	"RandomForest": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", RandomForestClassifier(n_estimators=80, min_samples_leaf=2, n_jobs=-1, random_state=RANDOM_STATE))]),
	"ExtraTrees": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", ExtraTreesClassifier(n_estimators=120, min_samples_leaf=2, n_jobs=-1, random_state=RANDOM_STATE))]),
	"GradientBoosting": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", GradientBoostingClassifier(n_estimators=100, learning_rate=0.06, random_state=RANDOM_STATE))]),
	"HistGradientBoosting": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", HistGradientBoostingClassifier(max_iter=120, random_state=RANDOM_STATE))]),
	"AdaBoost": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", AdaBoostClassifier(n_estimators=80, learning_rate=0.08, random_state=RANDOM_STATE))]),
	"NaiveBayes": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", GaussianNB())]),
	"KNN": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", KNeighborsClassifier(n_neighbors=7))]),
	"SVC": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", SVC(C=1.0, probability=True, random_state=RANDOM_STATE))]),
	"Dummy": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", DummyClassifier(strategy="most_frequent"))]),
	}
	else:
	models = {
	"LinearRegression": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", LinearRegression())]),
	"Ridge": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", Ridge(alpha=1.0))]),
	"BayesianRidge": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", BayesianRidge())]),
	"RandomForest": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", RandomForestRegressor(n_estimators=80, min_samples_leaf=2, n_jobs=-1, random_state=RANDOM_STATE))]),
	"ExtraTrees": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", ExtraTreesRegressor(n_estimators=120, min_samples_leaf=2, n_jobs=-1, random_state=RANDOM_STATE))]),
	"GradientBoosting": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", GradientBoostingRegressor(n_estimators=100, learning_rate=0.06, random_state=RANDOM_STATE))]),
	"HistGradientBoosting": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", HistGradientBoostingRegressor(max_iter=120, random_state=RANDOM_STATE))]),
	"AdaBoost": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", AdaBoostRegressor(n_estimators=80, learning_rate=0.08, random_state=RANDOM_STATE))]),
	"KNN": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", KNeighborsRegressor(n_neighbors=7))]),
	"SVR": Pipeline([("prep", make_preprocessor(pd.DataFrame(), True)), ("model", SVR(C=1.0))]),
	"Dummy": Pipeline([("prep", make_preprocessor(pd.DataFrame())), ("model", DummyRegressor(strategy="median"))]),
	}
	if importlib.util.find_spec("xgboost"):
	from xgboost import XGBClassifier, XGBRegressor

	if task == "classification":
	models["XGBoost"] = Pipeline(
	[
	("prep", make_preprocessor(pd.DataFrame())),
	("model", XGBClassifier(n_estimators=80, max_depth=4, learning_rate=0.08, eval_metric="logloss", random_state=RANDOM_STATE)),
	]
	)
	else:
	models["XGBoost"] = Pipeline(
	[
	("prep", make_preprocessor(pd.DataFrame())),
	("model", XGBRegressor(n_estimators=80, max_depth=4, learning_rate=0.08, random_state=RANDOM_STATE)),
	]
	)
	if importlib.util.find_spec("lightgbm"):
	try:
	from lightgbm import LGBMClassifier, LGBMRegressor

	if task == "classification":
	models["LightGBM"] = Pipeline(
	[
	("prep", make_preprocessor(pd.DataFrame())),
	("model", LGBMClassifier(n_estimators=120, learning_rate=0.06, random_state=RANDOM_STATE, verbose=-1)),
	]
	)
	else:
	models["LightGBM"] = Pipeline(
	[
	("prep", make_preprocessor(pd.DataFrame())),
	("model", LGBMRegressor(n_estimators=120, learning_rate=0.06, random_state=RANDOM_STATE, verbose=-1)),
	]
	)
	except Exception:
	pass
	return models


	def rebuild_pipeline(model: Pipeline, x_train: pd.DataFrame) -> Pipeline:
	pipe = clone(model)
	wants_scale = pipe.steps[0][1].transformers and "scale" in str(pipe.steps[0][1].transformers[0][1])
	pipe.steps[0] = ("prep", make_preprocessor(x_train, scale_numeric=wants_scale))
	return pipe


	@lru_cache(maxsize=2)
	def load_tabfm_model(task: str):
	from tabfm import tabfm_v1_0_0_pytorch

	model_type = "classification" if task == "classification" else "regression"
	return tabfm_v1_0_0_pytorch.load(model_type=model_type)


	def resolve_tabfm_params(
	preset: str,
	n_estimators: int,
	max_num_rows: int,
	max_num_features: int,
	batch_size: int,
	enable_nnls: bool,
	n_feature_crosses: str,
	n_svd_features: str,
	max_eval_rows: int,
	) -> dict[str, object]:
	if preset in TABFM_PRESETS:
	return dict(TABFM_PRESETS[preset])
	resolved_rows = None if max_num_rows <= 0 else int(max_num_rows)
	resolved = {
	"n_estimators": int(n_estimators),
	"max_num_rows": resolved_rows,
	"max_num_features": int(max_num_features),
	"batch_size": int(batch_size),
	"enable_nnls": bool(enable_nnls),
	"n_feature_crosses": 0 if n_feature_crosses == "0" else n_feature_crosses,
	"n_svd_features": 0 if n_svd_features == "0" else n_svd_features,
	"max_eval_rows": None if max_eval_rows <= 0 else int(max_eval_rows),
	}
	if resolved["enable_nnls"] and resolved["max_num_rows"] is not None:
	resolved["max_num_rows"] = None
	return resolved


	def run_tabfm(task: str, x_train: pd.DataFrame, x_test: pd.DataFrame, y_train: pd.Series, tabfm_params: dict[str, object]):
	from tabfm import TabFMClassifier, TabFMRegressor

	foundation_model = load_tabfm_model(task)
	estimator = (
	TabFMClassifier(model=foundation_model, **tabfm_params)
	if task == "classification"
	else TabFMRegressor(model=foundation_model, **tabfm_params)
	)
	estimator.fit(x_train, y_train.to_numpy())
	pred = estimator.predict(x_test)
	proba = estimator.predict_proba(x_test) if task == "classification" and hasattr(estimator, "predict_proba") else None
	return pred, proba


	def clean_tabfm_features(x_train: pd.DataFrame, x_test: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
	cleaned_train = pd.DataFrame(index=x_train.index)
	cleaned_test = pd.DataFrame(index=x_test.index)

	for column in x_train.columns:
	train_series = x_train[column].replace([np.inf, -np.inf], np.nan)
	test_series = x_test[column].replace([np.inf, -np.inf], np.nan)
	if train_series.isna().all() and test_series.isna().all():
	continue

	if pd.api.types.is_bool_dtype(train_series):
	cleaned_train[column] = train_series.astype("float64").fillna(0.0)
	cleaned_test[column] = test_series.astype("float64").fillna(0.0)
	elif pd.api.types.is_numeric_dtype(train_series):
	train_numeric = pd.to_numeric(train_series, errors="coerce")
	test_numeric = pd.to_numeric(test_series, errors="coerce")
	fill_value = train_numeric.median()
	if pd.isna(fill_value):
	fill_value = 0.0
	cleaned_train[column] = train_numeric.fillna(fill_value)
	cleaned_test[column] = test_numeric.fillna(fill_value)
	elif pd.api.types.is_datetime64_any_dtype(train_series):
	cleaned_train[column] = train_series.dt.strftime("%Y-%m-%d %H:%M:%S").fillna("__missing__")
	cleaned_test[column] = test_series.dt.strftime("%Y-%m-%d %H:%M:%S").fillna("__missing__")
	else:
	train_text = train_series.astype("string").str.strip()
	test_text = test_series.astype("string").str.strip()
	train_numeric = coerce_numeric_target(train_series)
	test_numeric = coerce_numeric_target(test_series)
	non_missing = train_text.notna() & (train_text != "")
	numeric_ratio = train_numeric.notna().sum() / non_missing.sum() if non_missing.sum() else 0
	if numeric_ratio >= 0.9:
	fill_value = train_numeric.median()
	if pd.isna(fill_value):
	fill_value = 0.0
	cleaned_train[column] = train_numeric.fillna(fill_value)
	cleaned_test[column] = test_numeric.fillna(fill_value)
	else:
	cleaned_train[column] = train_text.fillna("__missing__").replace("", "__missing__")
	cleaned_test[column] = test_text.fillna("__missing__").replace("", "__missing__")

	if cleaned_train.empty:
	raise gr.Error("TabFM needs at least one non-empty feature column.")

	return cleaned_train.reset_index(drop=True), cleaned_test.reset_index(drop=True)


	def tabfm_failure_note(exc: Exception) -> str:
	detail = f"{type(exc).__name__}: {exc}"
	print("TabFM failed:\n" + traceback.format_exc())
	env_errors = (ImportError, ModuleNotFoundError, OSError)
	if isinstance(exc, env_errors):
	return f"TabFM did not run because the runtime could not load it: `{detail}`. On Spaces, keep Python 3.11 and allow the GitHub dependency plus model download for `{TABFM_MODEL_ID}`."
	return f"TabFM failed while processing this dataset: `{detail}`."


	def score_predictions(task: str, y_true: pd.Series, pred, proba=None) -> dict[str, float]:
	if task == "classification":
	metrics = {
	"accuracy": accuracy_score(y_true, pred),
	"f1_weighted": f1_score(y_true, pred, average="weighted", zero_division=0),
	}
	if proba is not None and len(np.unique(y_true)) == 2:
	try:
	metrics["roc_auc"] = roc_auc_score(y_true, proba[:, 1])
	except Exception:
	metrics["roc_auc"] = np.nan
	else:
	metrics["roc_auc"] = np.nan
	metrics["rank_score"] = np.nanmean([metrics["accuracy"], metrics["f1_weighted"], metrics["roc_auc"]])
	return metrics
	rmse = math.sqrt(mean_squared_error(y_true, pred))
	mae = mean_absolute_error(y_true, pred)
	r2 = r2_score(y_true, pred)
	return {"rmse": rmse, "mae": mae, "r2": r2, "rank_score": -rmse}


	def benchmark_frame(
	df: pd.DataFrame,
	target: str,
	task: str \| None,
	sample_size: int,
	test_size: float,
	seed: int,
	selected_models: list[str],
	include_tabfm: bool,
	tabfm_params: dict[str, object] \| None = None,
	) -> tuple[pd.DataFrame, pd.DataFrame, str]:
	df = df.sample(min(sample_size, len(df)), random_state=seed).reset_index(drop=True)
	x, y, task = prepare_xy(df, target, task)
	if task == "classification" and y.nunique(dropna=True) < 2:
	raise gr.Error("Classification needs at least two target classes.")
	stratify = y if task == "classification" and y.value_counts().min() >= 2 else None
	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=seed, stratify=stratify)

	rows: list[dict[str, object]] = []
	notes: list[str] = []
	selected_models = selected_models or []
	models = available_baselines(task)
	for name, model in models.items():
	if name not in selected_models:
	continue
	start = time.perf_counter()
	try:
	pipe = rebuild_pipeline(model, x_train)
	pipe.fit(x_train, y_train)
	pred = pipe.predict(x_test)
	proba = pipe.predict_proba(x_test) if task == "classification" and hasattr(pipe, "predict_proba") else None
	metrics = score_predictions(task, y_test, pred, proba)
	rows.append({"model": name, "status": "ok", "seconds": time.perf_counter() - start, **metrics})
	except Exception as exc:
	rows.append({"model": name, "status": f"failed: {exc}", "seconds": time.perf_counter() - start})
	for name in selected_models:
	if name not in models:
	rows.append({"model": name, "status": f"not compatible with {task} or unavailable", "seconds": 0.0, "rank_score": np.nan})

	if include_tabfm:
	start = time.perf_counter()
	try:
	tabfm_params = tabfm_params or TABFM_PRESETS["Fast"]
	tabfm_display_params = dict(tabfm_params)
	tabfm_model_params = dict(tabfm_params)
	tabfm_eval_rows = tabfm_model_params.pop("max_eval_rows", None)
	if tabfm_eval_rows is not None and len(x_test) > int(tabfm_eval_rows):
	eval_idx = x_test.sample(int(tabfm_eval_rows), random_state=seed).index
	x_eval = x_test.loc[eval_idx]
	y_eval = y_test.loc[eval_idx]
	status = f"ok ({len(x_eval):,}/{len(x_test):,} test rows)"
	else:
	x_eval = x_test
	y_eval = y_test
	status = "ok"
	x_tab_train, x_tab_eval = clean_tabfm_features(x_train, x_eval)
	pred, proba = run_tabfm(task, x_tab_train, x_tab_eval, y_train, tabfm_model_params)
	metrics = score_predictions(task, y_eval, pred, proba)
	rows.append({"model": "TabFM", "status": status, "seconds": time.perf_counter() - start, **metrics})
	except Exception as exc:
	rows.append({"model": "TabFM", "status": f"unavailable: {exc}", "seconds": time.perf_counter() - start})
	notes.append(tabfm_failure_note(exc))
	else:
	rows.append({"model": "TabFM", "status": "skipped - enable Run TabFM live", "seconds": 0.0, "rank_score": np.nan})
	notes.append("TabFM is listed as skipped because Run TabFM live is off. Enable it to benchmark TabFM; the first run may download large model weights.")

	results = pd.DataFrame(rows)
	metric_cols = [c for c in ["accuracy", "f1_weighted", "roc_auc", "rmse", "mae", "r2", "rank_score", "seconds"] if c in results.columns]
	if not results.empty and "rank_score" in results.columns:
	results = results.sort_values("rank_score", ascending=False, na_position="last").reset_index(drop=True)
	results.insert(0, "rank", np.arange(1, len(results) + 1))
	preview = pd.concat([x_test.reset_index(drop=True).head(12), y_test.reset_index(drop=True).head(12).rename(target)], axis=1)
	summary = (
	f"Task: {task} \n"
	f"Rows used: {len(df):,} \| Train: {len(x_train):,} \| Test: {len(x_test):,} \| Features: {x.shape[1]:,} \n"
	f"Primary rank: {'higher accuracy/F1/AUC' if task == 'classification' else 'lower RMSE'}"
	)
	if notes:
	summary += "\n\n" + "\n".join(f"- {note}" for note in notes)
	if include_tabfm:
	summary += f"\n\nTabFM params: `{tabfm_display_params}`"
	return results[["rank", "model", "status", *metric_cols]], preview, summary


	def metric_chart(results: pd.DataFrame, selected_metrics: list[str] \| None = None, chart_style: str = "Line") -> go.Figure:
	if results is None or results.empty:
	return go.Figure()
	selected_metrics = selected_metrics or METRIC_CHOICES
	metric_cols = [c for c in selected_metrics if c in results.columns and results[c].notna().any()]
	if not metric_cols:
	metric_cols = [c for c in METRIC_CHOICES if c in results.columns and results[c].notna().any()]
	if not metric_cols:
	return go.Figure()

	clean = results.sort_values("rank") if "rank" in results.columns else results.copy()
	clean = clean.dropna(subset=metric_cols, how="all")
	if clean.empty:
	return go.Figure()
	x_labels = clean["model"].astype(str).tolist()
	if chart_style == "Radar":
	normalized = clean[["model", *metric_cols]].copy()
	for metric in metric_cols:
	values = pd.to_numeric(normalized[metric], errors="coerce")
	lo, hi = values.min(), values.max()
	if pd.isna(lo) or pd.isna(hi) or hi == lo:
	normalized[metric] = 0.5
	elif metric in {"rmse", "mae", "seconds"}:
	normalized[metric] = 1 - ((values - lo) / (hi - lo))
	else:
	normalized[metric] = (values - lo) / (hi - lo)
	fig = go.Figure()
	theta = metric_cols + [metric_cols[0]]
	for idx, row in normalized.iterrows():
	values = [row[m] for m in metric_cols] + [row[metric_cols[0]]]
	fig.add_trace(
	go.Scatterpolar(
	r=values,
	theta=theta,
	fill="toself",
	name=str(row["model"]),
	line=dict(color=GOOGLE_COLORS[idx % len(GOOGLE_COLORS)], width=2),
	opacity=0.78,
	)
	)
	fig.update_layout(
	template="plotly_white",
	height=420,
	margin=dict(l=35, r=35, t=35, b=25),
	polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
	legend_title_text="Model",
	title="Normalized metric shape (higher is better)",
	)
	return fig

	fig = make_subplots(
	rows=len(metric_cols),
	cols=1,
	shared_xaxes=True,
	vertical_spacing=0.08,
	subplot_titles=[metric.replace("_", " ").upper() for metric in metric_cols],
	)
	for idx, metric in enumerate(metric_cols, start=1):
	fig.add_trace(
	go.Scatter(
	x=x_labels,
	y=clean[metric],
	mode="lines+markers",
	name=metric,
	line=dict(color=GOOGLE_COLORS[(idx - 1) % len(GOOGLE_COLORS)], width=3, shape="spline"),
	marker=dict(size=9, line=dict(color="white", width=1.5)),
	hovertemplate=f"<b>%{{x}}</b><br>{metric}: %{{y:.4f}}<extra></extra>",
	),
	row=idx,
	col=1,
	)
	if metric in {"accuracy", "f1_weighted", "roc_auc", "r2"}:
	fig.update_yaxes(range=[min(0, float(clean[metric].min()) - 0.05), 1.02], row=idx, col=1)
	fig.update_layout(
	template="plotly_white",
	height=max(320, 185 * len(metric_cols)),
	margin=dict(l=30, r=20, t=40, b=35),
	showlegend=False,
	hovermode="x unified",
	)
	return fig


	def bar_chart(results: pd.DataFrame, selected_metrics: list[str] \| None = None) -> go.Figure:
	if results is None or results.empty:
	return go.Figure()
	results = results.copy()
	selected_metrics = selected_metrics or METRIC_CHOICES
	metric_cols = [c for c in selected_metrics if c in results.columns and results[c].notna().any()]
	if not metric_cols:
	metric_cols = [c for c in METRIC_CHOICES if c in results.columns and results[c].notna().any()]
	if not metric_cols:
	return go.Figure()
	clean = results.sort_values("rank") if "rank" in results.columns else results.copy()
	clean = clean.dropna(subset=metric_cols, how="all")
	if clean.empty:
	return go.Figure()
	long = clean.melt(id_vars=["model"], value_vars=metric_cols, var_name="metric", value_name="score")
	fig = px.bar(
	long,
	x="model",
	y="score",
	color="metric",
	barmode="group",
	color_discrete_sequence=GOOGLE_COLORS,
	title="Grouped metric comparison",
	)
	fig.update_layout(
	template="plotly_white",
	height=360,
	margin=dict(l=25, r=20, t=45, b=35),
	legend_title_text="Metric",
	hovermode="x unified",
	)
	return fig


	def time_chart(results: pd.DataFrame) -> go.Figure:
	if results is None or results.empty or "seconds" not in results:
	return go.Figure()
	results = results.copy()
	if "status" in results.columns:
	results = results[~results["status"].astype(str).str.startswith("skipped")]
	if results.empty:
	return go.Figure()
	fig = px.scatter(
	results,
	x="seconds",
	y="model",
	size=np.maximum(results.get("rank_score", pd.Series([1] * len(results))).fillna(0).abs(), 0.1),
	color="model",
	color_discrete_sequence=px.colors.qualitative.Set2,
	)
	fig.update_layout(template="plotly_white", height=300, margin=dict(l=20, r=20, t=25, b=20), showlegend=False)
	return fig


	def run_catalog(
	dataset_name: str,
	sample_size: int,
	test_percent: int,
	seed: int,
	selected_models: list[str],
	include_tabfm: bool,
	selected_metrics: list[str],
	chart_style: str,
	tabfm_preset: str,
	tabfm_n_estimators: int,
	tabfm_max_rows: int,
	tabfm_max_features: int,
	tabfm_batch_size: int,
	tabfm_enable_nnls: bool,
	tabfm_crosses: str,
	tabfm_svd: str,
	tabfm_max_eval_rows: int,
	):
	spec = get_spec(dataset_name)
	df = get_dataset(dataset_name, sample_size, seed)
	tabfm_params = resolve_tabfm_params(tabfm_preset, tabfm_n_estimators, tabfm_max_rows, tabfm_max_features, tabfm_batch_size, tabfm_enable_nnls, tabfm_crosses, tabfm_svd, tabfm_max_eval_rows)
	results, preview, summary = benchmark_frame(df, spec.target, spec.task, sample_size, test_percent / 100, seed, selected_models, include_tabfm, tabfm_params)
	return summary, results.round(4), metric_chart(results, selected_metrics, chart_style), time_chart(results), bar_chart(results, selected_metrics), preview


	def run_upload(
	file,
	target: str,
	task: str,
	sample_size: int,
	test_percent: int,
	seed: int,
	selected_models: list[str],
	include_tabfm: bool,
	selected_metrics: list[str],
	chart_style: str,
	tabfm_preset: str,
	tabfm_n_estimators: int,
	tabfm_max_rows: int,
	tabfm_max_features: int,
	tabfm_batch_size: int,
	tabfm_enable_nnls: bool,
	tabfm_crosses: str,
	tabfm_svd: str,
	tabfm_max_eval_rows: int,
	):
	if file is None:
	raise gr.Error("Upload a CSV file first.")
	df = pd.read_csv(file.name)
	selected_task = None if task == "Auto" else task.lower()
	tabfm_params = resolve_tabfm_params(tabfm_preset, tabfm_n_estimators, tabfm_max_rows, tabfm_max_features, tabfm_batch_size, tabfm_enable_nnls, tabfm_crosses, tabfm_svd, tabfm_max_eval_rows)
	results, preview, summary = benchmark_frame(df, target, selected_task, sample_size, test_percent / 100, seed, selected_models, include_tabfm, tabfm_params)
	return summary, results.round(4), metric_chart(results, selected_metrics, chart_style), time_chart(results), bar_chart(results, selected_metrics), preview


	def redraw_metric_chart(results: pd.DataFrame, selected_metrics: list[str], chart_style: str):
	if results is None or len(results) == 0:
	return go.Figure()
	return metric_chart(pd.DataFrame(results), selected_metrics, chart_style)


	def redraw_bar_chart(results: pd.DataFrame, selected_metrics: list[str]):
	if results is None or len(results) == 0:
	return go.Figure()
	return bar_chart(pd.DataFrame(results), selected_metrics)


	def catalog_table() -> pd.DataFrame:
	return pd.DataFrame(
	[
	{
	"dataset": d.name,
	"task": d.task,
	"target": d.target,
	"rows": d.rows,
	"source": d.source,
	"description": d.description,
	}
	for d in DATASETS
	]
	)


	DEFAULT_MODELS = [
	"Logistic",
	"LinearRegression",
	"Ridge",
	"BayesianRidge",
	"NaiveBayes",
	"RandomForest",
	"ExtraTrees",
	"GradientBoosting",
	"HistGradientBoosting",
	"AdaBoost",
	"KNN",
	"SVC",
	"SVR",
	"XGBoost",
	"LightGBM",
	"Dummy",
	]
	DEFAULT_SELECTED_MODELS = ["HistGradientBoosting", "XGBoost", "LightGBM", "Dummy"]


	def build_app() -> gr.Blocks:
	css = """
	body, .gradio-container { background: #f7f9fd; color: #101828; }
	.shell { max-width: 1440px; margin: 0 auto; }
	.hero { background: linear-gradient(135deg, #ffffff 0%, #f6f9ff 56%, #fff7ed 100%); border: 1px solid #e9edf5; border-radius: 18px; padding: 26px 28px; box-shadow: 0 20px 55px rgba(15, 23, 42, 0.07); }
	.hero h1 { font-size: 36px; line-height: 1.05; margin: 0 0 8px; letter-spacing: 0; }
	.hero p { margin: 0; color: #667085; font-size: 15px; }
	.stat-card { background: #fff; border: 1px solid #edf0f5; border-radius: 14px; padding: 18px; box-shadow: 0 12px 35px rgba(15, 23, 42, 0.05); min-height: 118px; }
	.stat-card .label { color: #667085; font-size: 13px; }
	.stat-card .value { font-size: 28px; font-weight: 760; margin-top: 12px; }
	.stat-card .trend { display: inline-block; margin-left: 8px; font-size: 12px; color: #027a48; background: #ecfdf3; border-radius: 999px; padding: 2px 8px; }
	.panel { background: #fff; border: 1px solid #edf0f5; border-radius: 14px; padding: 14px; box-shadow: 0 12px 35px rgba(15, 23, 42, 0.04); }
	.control-panel { background: linear-gradient(180deg, #ffffff 0%, #fbfcff 100%); border-top: 4px solid #f97316; }
	.control-panel label span { color: #344054; font-weight: 720; }
	.control-panel input, .control-panel textarea, .control-panel select { border-radius: 10px !important; }
	.control-panel .wrap { gap: 9px !important; }
	.control-panel .token, .control-panel [data-testid="token"] { background: #fff7ed !important; color: #c2410c !important; border: 1px solid #fed7aa !important; border-radius: 999px !important; }
	.control-panel .checkbox label { border-radius: 999px !important; }
	.gr-button-primary { background: linear-gradient(135deg, #f97316, #ea4335) !important; border-color: #f97316 !important; box-shadow: 0 12px 24px rgba(249, 115, 22, 0.25) !important; border-radius: 12px !important; min-height: 46px !important; font-weight: 760 !important; }
	.plot-container, .table-wrap { border-radius: 14px !important; overflow: hidden; }
	footer { display: none !important; }
	"""
	with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft(primary_hue="orange", secondary_hue="violet")) as demo:
	with gr.Column(elem_classes=["shell"]):
	gr.HTML(
	"""
	<div class="hero">
	<h1>tabBench</h1>
	<p>A clean arena for benchmarking <strong>google/tabfm-1.0.0-pytorch</strong> against practical tabular baselines across small, classic, imbalanced, and user-uploaded datasets.</p>
	</div>
	"""
	)
	with gr.Row():
	gr.HTML(f'<div class="stat-card"><div class="label">Benchmark catalog</div><div class="value">{len(DATASETS)} <span class="trend">mixed tasks</span></div><div class="label">Classification + regression</div></div>')
	gr.HTML('<div class="stat-card"><div class="label">Linked HF model</div><div class="value">TabFM <span class="trend">1.0</span></div><div class="label">google/tabfm-1.0.0-pytorch</div></div>')
	gr.HTML('<div class="stat-card"><div class="label">User datasets</div><div class="value">CSV <span class="trend">upload</span></div><div class="label">Pick target, task, sample size</div></div>')
	with gr.Tabs():
	with gr.Tab("Arena"):
	with gr.Row():
	with gr.Column(scale=1, elem_classes=["panel", "control-panel"]):
	dataset = gr.Dropdown(dataset_names(), value="Titanic Survival", label="Dataset")
	sample = gr.Slider(100, 50000, value=1000, step=100, label="Sample size")
	test_pct = gr.Slider(10, 40, value=25, step=5, label="Test split (%)")
	seed = gr.Number(value=42, precision=0, label="Random seed")
	models = gr.Dropdown(DEFAULT_MODELS, value=DEFAULT_SELECTED_MODELS, multiselect=True, label="Models")
	include_tabfm = gr.Checkbox(value=False, label="Run TabFM live (adds TabFM row)")
	metric_toggles = gr.Dropdown(METRIC_CHOICES, value=["rmse", "mae", "r2", "accuracy", "f1_weighted", "roc_auc"], multiselect=True, label="Chart metrics")
	chart_style = gr.Radio(["Line", "Radar"], value="Line", label="Chart style")
	with gr.Accordion("TabFM tuning", open=False):
	tabfm_preset = gr.Dropdown(list(TABFM_PRESETS.keys()) + ["Custom"], value="Fast", label="Preset")
	tabfm_n_estimators = gr.Slider(1, 32, value=1, step=1, label="Estimators")
	tabfm_max_rows = gr.Slider(0, 5000, value=256, step=64, label="Max context rows (0 = no cap)")
	tabfm_max_features = gr.Slider(8, 500, value=64, step=8, label="Max features")
	tabfm_batch_size = gr.Slider(1, 8, value=1, step=1, label="Batch size")
	tabfm_enable_nnls = gr.Checkbox(value=False, label="NNLS ensemble weights")
	tabfm_crosses = gr.Radio(["0", "sqrt"], value="0", label="Feature crosses")
	tabfm_svd = gr.Radio(["0", "sqrt"], value="0", label="SVD features")
	tabfm_max_eval_rows = gr.Slider(0, 5000, value=256, step=64, label="Max TabFM test rows (0 = no cap)")
	run_btn = gr.Button("Run benchmark", variant="primary")
	with gr.Column(scale=3):
	summary = gr.Markdown()
	leaderboard = gr.Dataframe(label="Leaderboard", interactive=False)
	bars = gr.Plot(label="Main grouped comparison")
	with gr.Row():
	chart = gr.Plot(label="Metric comparison")
	speed = gr.Plot(label="Speed")
	preview = gr.Dataframe(label="Held-out preview", interactive=False)
	run_inputs = [
	dataset,
	sample,
	test_pct,
	seed,
	models,
	include_tabfm,
	metric_toggles,
	chart_style,
	tabfm_preset,
	tabfm_n_estimators,
	tabfm_max_rows,
	tabfm_max_features,
	tabfm_batch_size,
	tabfm_enable_nnls,
	tabfm_crosses,
	tabfm_svd,
	tabfm_max_eval_rows,
	]
	run_outputs = [summary, leaderboard, chart, speed, bars, preview]
	run_btn.click(run_catalog, run_inputs, run_outputs)
	metric_toggles.change(redraw_metric_chart, [leaderboard, metric_toggles, chart_style], chart)
	metric_toggles.change(redraw_bar_chart, [leaderboard, metric_toggles], bars)
	chart_style.change(redraw_metric_chart, [leaderboard, metric_toggles, chart_style], chart)
	demo.load(run_catalog, run_inputs, run_outputs)
	with gr.Tab("Upload Dataset"):
	with gr.Row():
	with gr.Column(scale=1, elem_classes=["panel", "control-panel"]):
	file = gr.File(label="CSV file", file_types=[".csv"])
	target = gr.Textbox(label="Target column")
	task = gr.Radio(["Auto", "Classification", "Regression"], value="Auto", label="Task")
	upload_sample = gr.Slider(100, 50000, value=1000, step=100, label="Sample size")
	upload_test_pct = gr.Slider(10, 40, value=25, step=5, label="Test split (%)")
	upload_seed = gr.Number(value=42, precision=0, label="Random seed")
	upload_models = gr.Dropdown(DEFAULT_MODELS, value=DEFAULT_SELECTED_MODELS, multiselect=True, label="Models")
	upload_tabfm = gr.Checkbox(value=False, label="Run TabFM live (adds TabFM row)")
	upload_metric_toggles = gr.Dropdown(METRIC_CHOICES, value=["rmse", "mae", "r2", "accuracy", "f1_weighted", "roc_auc"], multiselect=True, label="Chart metrics")
	upload_chart_style = gr.Radio(["Line", "Radar"], value="Line", label="Chart style")
	with gr.Accordion("TabFM tuning", open=False):
	upload_tabfm_preset = gr.Dropdown(list(TABFM_PRESETS.keys()) + ["Custom"], value="Fast", label="Preset")
	upload_tabfm_n_estimators = gr.Slider(1, 32, value=1, step=1, label="Estimators")
	upload_tabfm_max_rows = gr.Slider(0, 5000, value=256, step=64, label="Max context rows (0 = no cap)")
	upload_tabfm_max_features = gr.Slider(8, 500, value=64, step=8, label="Max features")
	upload_tabfm_batch_size = gr.Slider(1, 8, value=1, step=1, label="Batch size")
	upload_tabfm_enable_nnls = gr.Checkbox(value=False, label="NNLS ensemble weights")
	upload_tabfm_crosses = gr.Radio(["0", "sqrt"], value="0", label="Feature crosses")
	upload_tabfm_svd = gr.Radio(["0", "sqrt"], value="0", label="SVD features")
	upload_tabfm_max_eval_rows = gr.Slider(0, 5000, value=256, step=64, label="Max TabFM test rows (0 = no cap)")
	upload_btn = gr.Button("Run uploaded dataset", variant="primary")
	with gr.Column(scale=3):
	upload_summary = gr.Markdown()
	upload_leaderboard = gr.Dataframe(label="Upload leaderboard", interactive=False)
	upload_bars = gr.Plot(label="Main grouped comparison")
	with gr.Row():
	upload_chart = gr.Plot(label="Metric comparison")
	upload_speed = gr.Plot(label="Speed")
	upload_preview = gr.Dataframe(label="Held-out preview", interactive=False)
	upload_btn.click(
	run_upload,
	[
	file,
	target,
	task,
	upload_sample,
	upload_test_pct,
	upload_seed,
	upload_models,
	upload_tabfm,
	upload_metric_toggles,
	upload_chart_style,
	upload_tabfm_preset,
	upload_tabfm_n_estimators,
	upload_tabfm_max_rows,
	upload_tabfm_max_features,
	upload_tabfm_batch_size,
	upload_tabfm_enable_nnls,
	upload_tabfm_crosses,
	upload_tabfm_svd,
	upload_tabfm_max_eval_rows,
	],
	[upload_summary, upload_leaderboard, upload_chart, upload_speed, upload_bars, upload_preview],
	)
	upload_metric_toggles.change(redraw_metric_chart, [upload_leaderboard, upload_metric_toggles, upload_chart_style], upload_chart)
	upload_metric_toggles.change(redraw_bar_chart, [upload_leaderboard, upload_metric_toggles], upload_bars)
	upload_chart_style.change(redraw_metric_chart, [upload_leaderboard, upload_metric_toggles, upload_chart_style], upload_chart)
	with gr.Tab("Dataset Catalog"):
	gr.Dataframe(catalog_table(), interactive=False, label="Included benchmark catalog")
	gr.Markdown(
	"""
	Most catalog datasets are loaded from OpenML, KaggleHub, FiveThirtyEight GitHub data, or sklearn. Each remote loader has a fallback so the Space remains usable if an upstream dataset is temporarily unavailable.
	"""
	)
	with gr.Tab("Implementation Notes"):
	gr.Markdown(
	"""
	This Space declares `models: google/tabfm-1.0.0-pytorch` in its README metadata, which is what Hugging Face uses to associate Spaces with model pages.

	TabFM is attempted only when Run TabFM live is enabled because the first run downloads large model weights and CPU Basic inference can be slow. Use the Fast preset for a quick smoke test, then increase estimators/context rows for stronger but slower runs.

	The TabFM integration follows the Google Research README pattern: load `tabfm_v1_0_0_pytorch`, wrap it with `TabFMClassifier` or `TabFMRegressor`, call `fit` for context preparation, then `predict`.
	"""
	)
	return demo


	if __name__ == "__main__":
	build_app().queue(max_size=16).launch()