Spaces:

3nthusiast
/

Insurance-Claim-Predict

Sleeping

Insurance-Claim-Predict / models /svr.py

Mateusz Paszynski

publish website

5de1466 about 1 year ago

5.78 kB

	import pandas as pd
	import numpy as np

	from sklearn.svm import NuSVR
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder, MinMaxScaler
	from sklearn.pipeline import Pipeline
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_absolute_error, r2_score

	import joblib


	class NuSVRInsuranceModel:
	"""
	This class encapsulates:
	1. Preprocessing: column transformations, scaling
	2. Prediction: using NuSVR
	3. Postprocessing: inverse-transform predictions to original scale
	"""

	# --- Custom Transformer defined INSIDE the class ---
	class MultiplyScaler(BaseEstimator, TransformerMixin):
	def __init__(self, factor=2):
	self.factor = factor

	def fit(self, X, y=None):
	return self

	def transform(self, X):
	return X * self.factor

	def __init__(self):
	"""
	In the constructor, define the column pipelines, the main ColumnTransformer,
	the target scaler, and the model.
	"""

	# Example pipelines (adjust as needed)
	text_pipeline = Pipeline([
	('one-hot', OneHotEncoder())
	])

	nums_pipeline = Pipeline([
	('normalize', StandardScaler(with_mean=True)),
	])

	nums_pipeline_strong = Pipeline([
	('normalize', StandardScaler(with_mean=True)),
	# Note we reference the nested class here
	('scalarMultiply', NuSVRInsuranceModel.MultiplyScaler(factor=2))
	])

	smoke_pipeline = Pipeline([
	('one-hot', OneHotEncoder()),
	('normalize', StandardScaler(with_mean=False)),
	('scalar-multiply', NuSVRInsuranceModel.MultiplyScaler(factor=5))
	])

	region_pipeline = Pipeline([
	('categories', OrdinalEncoder())
	])

	# Create ColumnTransformer
	# Adjust columns to match your dataset's actual column names
	self.ct = ColumnTransformer([
	('str_handler', text_pipeline, ['diabetic', 'gender']),
	('smoke_handle', smoke_pipeline, ['smoker']),
	('floats_ints_weak', nums_pipeline, ['children', 'age']),
	('floats_ints_strong', nums_pipeline_strong, ['bmi', 'bloodpressure']),
	])

	# Target scaler (for the 'claim' column)
	self.target_scaler = MinMaxScaler(feature_range=(-0.5, 0.5))

	# NuSVR model with desired hyperparameters
	self.model = NuSVR(C=10, gamma='scale', kernel='rbf', nu=0.80)

	def preprocessing(self, df):
	"""
	Takes a raw dataframe (with the relevant columns) and applies the
	fitted ColumnTransformer used in training.
	Returns the transformed feature matrix.
	"""
	return self.ct.transform(df)

	def predict(self, preprocessed_data):
	"""
	Takes already-preprocessed data (matrix/array) and outputs the
	final predictions in the original scale.
	"""
	y_pred_scaled = self.model.predict(preprocessed_data)
	return self.postprocessing(y_pred_scaled)

	def postprocessing(self, y_pred_scaled):
	"""
	Takes scaled predictions (in the target_scaler domain) and inversely
	transforms them back to the original target domain.
	"""
	y_pred_original = self.target_scaler.inverse_transform(
	y_pred_scaled.reshape(-1, 1)
	)
	return y_pred_original.ravel()


	if __name__ == "__main__":
	# -------------------------------------------------
	# 1. Load data
	# -------------------------------------------------
	df = pd.read_csv('cleaned_insurance_data.csv')

	# Separate features and target
	features = df.drop(columns=['claim', 'PatientID', 'index'])
	target = df['claim']

	# -------------------------------------------------
	# 2. Instantiate our NuSVRInsuranceModel
	# -------------------------------------------------
	nusvr_wrapper = NuSVRInsuranceModel()

	# -------------------------------------------------
	# 3. Train-test split
	# -------------------------------------------------
	X_train_raw, X_test_raw, y_train, y_test = train_test_split(
	features, target, test_size=0.25, random_state=42
	)

	# -------------------------------------------------
	# 4. Fit ColumnTransformer & target scaler on TRAIN data
	# -------------------------------------------------
	# Fit the ColumnTransformer
	X_train_t = nusvr_wrapper.ct.fit_transform(X_train_raw)
	# Fit the target scaler
	y_train_t = nusvr_wrapper.target_scaler.fit_transform(y_train.values.reshape(-1, 1)).ravel()

	# -------------------------------------------------
	# 5. Train the NuSVR model
	# -------------------------------------------------
	nusvr_wrapper.model.fit(X_train_t, y_train_t)

	# -------------------------------------------------
	# 6. Evaluate on test data
	# -------------------------------------------------
	# Preprocess the test features with the same pipeline
	X_test_t = nusvr_wrapper.preprocessing(X_test_raw)

	# Make predictions (in original scale)
	y_pred = nusvr_wrapper.predict(X_test_t)

	mae = mean_absolute_error(y_test, y_pred)
	r2 = r2_score(y_test, y_pred)

	print(f"Test MAE (original scale): {mae:.3f}")
	print(f"Test R^2 (original scale): {r2:.3f}")

	# -------------------------------------------------
	# 7. Export the fitted model
	# -------------------------------------------------
	joblib.dump(nusvr_wrapper, "nusvr_insurance_model.joblib")
	print("Fitted NuSVRInsuranceModel saved to nusvr_insurance_model.joblib")