Spaces:

Yatheshr
/

catboost_gradient_traning

Build error

App Files Files Community

catboost_gradient_traning / app.py

Yatheshr

Update app.py

bd9a6cf verified 11 months ago

raw

history blame contribute delete

3.76 kB

	import pandas as pd
	import gradio as gr
	from catboost import CatBoostClassifier, Pool
	from sklearn.model_selection import train_test_split, RandomizedSearchCV
	from sklearn.metrics import accuracy_score
	import joblib

	# Sample Morningstar-like dataset
	data = {
	'fund_rating': [5, 4, 3, 5, 2, 4, 1, 3, 5, 2],
	'fund_category': ['Equity', 'Debt', 'Balanced', 'Equity', 'Debt', 'Equity', 'Balanced', 'Debt', 'Equity', 'Debt'],
	'risk_appetite': ['High', 'Medium', 'Low', 'High', 'Low', 'Medium', 'Low', 'Medium', 'High', 'Low'],
	'expense_ratio': [0.5, 1.2, 1.0, 0.4, 1.5, 1.1, 1.3, 0.8, 0.3, 1.4],
	'user_age': [35, 45, 29, 50, 60, 40, 30, 28, 55, 62],
	'investment_goal_years': [5, 10, 3, 15, 2, 7, 4, 5, 20, 1],
	'will_invest': [1, 1, 0, 1, 0, 1, 0, 0, 1, 0]
	}

	# Convert to DataFrame
	df = pd.DataFrame(data)
	X = df.drop('will_invest', axis=1)
	y = df['will_invest']
	cat_features = ['fund_category', 'risk_appetite']

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	train_pool = Pool(X_train, y_train, cat_features=cat_features)

	# Default model to start with
	model = CatBoostClassifier(verbose=0)
	model.fit(train_pool)

	# Save initial model
	joblib.dump(model, "catboost_model.pkl")

	# Prediction function
	def predict(fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years):
	input_data = pd.DataFrame([[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years]],
	columns=X.columns)
	loaded_model = joblib.load("catboost_model.pkl")
	return int(loaded_model.predict(input_data)[0])

	# Tuning function
	def tune_model():
	param_grid = {
	'depth': [4, 6, 8],
	'learning_rate': [0.01, 0.05, 0.1],
	'l2_leaf_reg': [1, 3, 5, 7],
	'n_estimators': [100, 200, 500],
	'bagging_temperature': [0, 0.5, 1],
	}
	search = RandomizedSearchCV(estimator=CatBoostClassifier(verbose=0),
	param_distributions=param_grid,
	scoring='accuracy',
	cv=3,
	n_iter=10,
	random_state=42)
	search.fit(X_train, y_train, cat_features=cat_features)
	best_params = search.best_params_

	# Re-train model with best params
	tuned_model = CatBoostClassifier(**best_params, verbose=0)
	tuned_model.fit(train_pool)

	# Save tuned model
	joblib.dump(tuned_model, "catboost_model.pkl")

	y_pred = tuned_model.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	return f"Tuned! Accuracy: {acc:.2f}\nBest Params: {best_params}"

	# Gradio Interface with Blocks
	with gr.Blocks() as full_ui:
	gr.Markdown("## Mutual Fund Investment Classifier (CatBoost + Gradio)")

	with gr.Row():
	fund_rating = gr.Slider(1, 5, label="Fund Rating")
	fund_category = gr.Dropdown(choices=['Equity', 'Debt', 'Balanced'], label="Fund Category")
	risk_appetite = gr.Dropdown(choices=['High', 'Medium', 'Low'], label="Risk Appetite")

	with gr.Row():
	expense_ratio = gr.Slider(0.1, 2.0, step=0.1, label="Expense Ratio")
	user_age = gr.Slider(18, 80, label="User Age")
	investment_goal_years = gr.Slider(1, 30, label="Investment Goal (Years)")

	predict_btn = gr.Button("Predict")
	output = gr.Label()
	predict_btn.click(
	fn=predict,
	inputs=[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years],
	outputs=output
	)

	tune_btn = gr.Button("Tune Model")
	tune_out = gr.Textbox(label="Tuning Output")
	tune_btn.click(fn=tune_model, inputs=[], outputs=tune_out)

	full_ui.launch()