Spaces:

HF-Pawan
/

Supervised-Learning-Model-Trainer

Running

Supervised-Learning-Model-Trainer / core /training.py

anyonehomep1mane

Latest Code Changes and Bug Fixes

4928a1a 19 days ago

3.6 kB

	import pandas as pd
	from sklearn.pipeline import Pipeline
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import LabelEncoder

	from core.detection import detect_target_type
	from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
	from preprocessing.transformers import build_preprocessor
	from utils.metrics import regression_metrics, classification_metrics
	from core.visuals import regression_graphs, classification_graphs
	from models.registry import MODEL_GROUPS

	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder, StandardScaler
	from sklearn.impute import SimpleImputer
	from sklearn.model_selection import train_test_split
	import numpy as np


	def build_preprocessor(df):
	X = df.iloc[:, :-1]
	y = df.iloc[:, -1]

	num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
	cat_cols = X.select_dtypes(include=["object", "category", "bool"]).columns.tolist()

	if len(num_cols) + len(cat_cols) == 0:
	raise ValueError("No usable feature columns found")

	numeric_pipeline = Pipeline([
	("imputer", SimpleImputer(strategy="median")),
	("scaler", StandardScaler())
	])

	categorical_pipeline = Pipeline([
	("imputer", SimpleImputer(strategy="most_frequent")),
	("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
	])

	preprocessor = ColumnTransformer(
	transformers=[
	("num", numeric_pipeline, num_cols),
	("cat", categorical_pipeline, cat_cols),
	],
	remainder="drop"
	)

	return X, y, preprocessor

	def build_pipeline(model, preprocessor):
	return Pipeline([
	("preprocessor", preprocessor),
	("model", model)
	])


	def train_model(file, task_type, model_group, model_name, graph_type):
	try:
	if file is None:
	return pd.DataFrame({
	"Error": [f"Please upload a csv file first."]

	}), None

	df = pd.read_csv(file.name)

	X, y, preprocessor = build_preprocessor(df)

	detected_task = detect_target_type(y)

	if task_type != detected_task:
	return pd.DataFrame({
	"Error": [f"Detected {detected_task} target, but {task_type} selected."]
	}), None

	if task_type == "Classification" and y.dtype == "object":
	y = LabelEncoder().fit_transform(y)

	model = MODEL_GROUPS[model_group][task_type][model_name]

	unique_count = len(np.unique(y))

	X_train, X_test, y_train, y_test = train_test_split(
	X, y,
	test_size=0.2,
	random_state=42,
	stratify=y if unique_count < 20 else None
	)

	pipeline = build_pipeline(model, preprocessor)

	pipeline.fit(X_train, y_train)

	preds = pipeline.predict(X_test)

	if task_type == "Regression":
	metrics = regression_metrics(y_test, preds)
	else:
	metrics = classification_metrics(pipeline, X_test, y_test, preds)

	fig = None
	if task_type == "Regression":
	fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
	else:
	fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)

	metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])

	return metrics_df, fig
	except ValueError as e:
	return (
	pd.DataFrame({"Error": [str(e)]}),
	None,
	)