Yatheshr's picture
Update app.py
bd9a6cf verified
import pandas as pd
import gradio as gr
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score
import joblib
# Sample Morningstar-like dataset
data = {
'fund_rating': [5, 4, 3, 5, 2, 4, 1, 3, 5, 2],
'fund_category': ['Equity', 'Debt', 'Balanced', 'Equity', 'Debt', 'Equity', 'Balanced', 'Debt', 'Equity', 'Debt'],
'risk_appetite': ['High', 'Medium', 'Low', 'High', 'Low', 'Medium', 'Low', 'Medium', 'High', 'Low'],
'expense_ratio': [0.5, 1.2, 1.0, 0.4, 1.5, 1.1, 1.3, 0.8, 0.3, 1.4],
'user_age': [35, 45, 29, 50, 60, 40, 30, 28, 55, 62],
'investment_goal_years': [5, 10, 3, 15, 2, 7, 4, 5, 20, 1],
'will_invest': [1, 1, 0, 1, 0, 1, 0, 0, 1, 0]
}
# Convert to DataFrame
df = pd.DataFrame(data)
X = df.drop('will_invest', axis=1)
y = df['will_invest']
cat_features = ['fund_category', 'risk_appetite']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_pool = Pool(X_train, y_train, cat_features=cat_features)
# Default model to start with
model = CatBoostClassifier(verbose=0)
model.fit(train_pool)
# Save initial model
joblib.dump(model, "catboost_model.pkl")
# Prediction function
def predict(fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years):
input_data = pd.DataFrame([[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years]],
columns=X.columns)
loaded_model = joblib.load("catboost_model.pkl")
return int(loaded_model.predict(input_data)[0])
# Tuning function
def tune_model():
param_grid = {
'depth': [4, 6, 8],
'learning_rate': [0.01, 0.05, 0.1],
'l2_leaf_reg': [1, 3, 5, 7],
'n_estimators': [100, 200, 500],
'bagging_temperature': [0, 0.5, 1],
}
search = RandomizedSearchCV(estimator=CatBoostClassifier(verbose=0),
param_distributions=param_grid,
scoring='accuracy',
cv=3,
n_iter=10,
random_state=42)
search.fit(X_train, y_train, cat_features=cat_features)
best_params = search.best_params_
# Re-train model with best params
tuned_model = CatBoostClassifier(**best_params, verbose=0)
tuned_model.fit(train_pool)
# Save tuned model
joblib.dump(tuned_model, "catboost_model.pkl")
y_pred = tuned_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
return f"Tuned! Accuracy: {acc:.2f}\nBest Params: {best_params}"
# Gradio Interface with Blocks
with gr.Blocks() as full_ui:
gr.Markdown("## Mutual Fund Investment Classifier (CatBoost + Gradio)")
with gr.Row():
fund_rating = gr.Slider(1, 5, label="Fund Rating")
fund_category = gr.Dropdown(choices=['Equity', 'Debt', 'Balanced'], label="Fund Category")
risk_appetite = gr.Dropdown(choices=['High', 'Medium', 'Low'], label="Risk Appetite")
with gr.Row():
expense_ratio = gr.Slider(0.1, 2.0, step=0.1, label="Expense Ratio")
user_age = gr.Slider(18, 80, label="User Age")
investment_goal_years = gr.Slider(1, 30, label="Investment Goal (Years)")
predict_btn = gr.Button("Predict")
output = gr.Label()
predict_btn.click(
fn=predict,
inputs=[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years],
outputs=output
)
tune_btn = gr.Button("Tune Model")
tune_out = gr.Textbox(label="Tuning Output")
tune_btn.click(fn=tune_model, inputs=[], outputs=tune_out)
full_ui.launch()