Spaces:
Build error
Build error
| import pandas as pd | |
| import gradio as gr | |
| from catboost import CatBoostClassifier, Pool | |
| from sklearn.model_selection import train_test_split, RandomizedSearchCV | |
| from sklearn.metrics import accuracy_score | |
| import joblib | |
| # Sample Morningstar-like dataset | |
| data = { | |
| 'fund_rating': [5, 4, 3, 5, 2, 4, 1, 3, 5, 2], | |
| 'fund_category': ['Equity', 'Debt', 'Balanced', 'Equity', 'Debt', 'Equity', 'Balanced', 'Debt', 'Equity', 'Debt'], | |
| 'risk_appetite': ['High', 'Medium', 'Low', 'High', 'Low', 'Medium', 'Low', 'Medium', 'High', 'Low'], | |
| 'expense_ratio': [0.5, 1.2, 1.0, 0.4, 1.5, 1.1, 1.3, 0.8, 0.3, 1.4], | |
| 'user_age': [35, 45, 29, 50, 60, 40, 30, 28, 55, 62], | |
| 'investment_goal_years': [5, 10, 3, 15, 2, 7, 4, 5, 20, 1], | |
| 'will_invest': [1, 1, 0, 1, 0, 1, 0, 0, 1, 0] | |
| } | |
| # Convert to DataFrame | |
| df = pd.DataFrame(data) | |
| X = df.drop('will_invest', axis=1) | |
| y = df['will_invest'] | |
| cat_features = ['fund_category', 'risk_appetite'] | |
| # Split data | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| train_pool = Pool(X_train, y_train, cat_features=cat_features) | |
| # Default model to start with | |
| model = CatBoostClassifier(verbose=0) | |
| model.fit(train_pool) | |
| # Save initial model | |
| joblib.dump(model, "catboost_model.pkl") | |
| # Prediction function | |
| def predict(fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years): | |
| input_data = pd.DataFrame([[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years]], | |
| columns=X.columns) | |
| loaded_model = joblib.load("catboost_model.pkl") | |
| return int(loaded_model.predict(input_data)[0]) | |
| # Tuning function | |
| def tune_model(): | |
| param_grid = { | |
| 'depth': [4, 6, 8], | |
| 'learning_rate': [0.01, 0.05, 0.1], | |
| 'l2_leaf_reg': [1, 3, 5, 7], | |
| 'n_estimators': [100, 200, 500], | |
| 'bagging_temperature': [0, 0.5, 1], | |
| } | |
| search = RandomizedSearchCV(estimator=CatBoostClassifier(verbose=0), | |
| param_distributions=param_grid, | |
| scoring='accuracy', | |
| cv=3, | |
| n_iter=10, | |
| random_state=42) | |
| search.fit(X_train, y_train, cat_features=cat_features) | |
| best_params = search.best_params_ | |
| # Re-train model with best params | |
| tuned_model = CatBoostClassifier(**best_params, verbose=0) | |
| tuned_model.fit(train_pool) | |
| # Save tuned model | |
| joblib.dump(tuned_model, "catboost_model.pkl") | |
| y_pred = tuned_model.predict(X_test) | |
| acc = accuracy_score(y_test, y_pred) | |
| return f"Tuned! Accuracy: {acc:.2f}\nBest Params: {best_params}" | |
| # Gradio Interface with Blocks | |
| with gr.Blocks() as full_ui: | |
| gr.Markdown("## Mutual Fund Investment Classifier (CatBoost + Gradio)") | |
| with gr.Row(): | |
| fund_rating = gr.Slider(1, 5, label="Fund Rating") | |
| fund_category = gr.Dropdown(choices=['Equity', 'Debt', 'Balanced'], label="Fund Category") | |
| risk_appetite = gr.Dropdown(choices=['High', 'Medium', 'Low'], label="Risk Appetite") | |
| with gr.Row(): | |
| expense_ratio = gr.Slider(0.1, 2.0, step=0.1, label="Expense Ratio") | |
| user_age = gr.Slider(18, 80, label="User Age") | |
| investment_goal_years = gr.Slider(1, 30, label="Investment Goal (Years)") | |
| predict_btn = gr.Button("Predict") | |
| output = gr.Label() | |
| predict_btn.click( | |
| fn=predict, | |
| inputs=[fund_rating, fund_category, risk_appetite, expense_ratio, user_age, investment_goal_years], | |
| outputs=output | |
| ) | |
| tune_btn = gr.Button("Tune Model") | |
| tune_out = gr.Textbox(label="Tuning Output") | |
| tune_btn.click(fn=tune_model, inputs=[], outputs=tune_out) | |
| full_ui.launch() | |