Spaces:
Sleeping
Sleeping
| ''' | |
| Materi Dr. Eng. Farrikh Alzami, M.Kom - Universitas Dian Nuswantoro | |
| ''' | |
| import streamlit as st | |
| # Page configuration - MUST be first Streamlit command | |
| st.set_page_config( | |
| page_title="Income Prediction App - Materi Dr.Eng. Farrikh Alzami, M.Kom", | |
| page_icon="💰", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| import json | |
| # Load model components | |
| def load_model(): | |
| """Load the trained model components""" | |
| try: | |
| components = joblib.load('./src/income_prediction_components.joblib') | |
| return components | |
| except FileNotFoundError: | |
| st.error("Model file 'income_prediction_components.joblib' not found!") | |
| st.stop() | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| st.stop() | |
| def predict_income(data, model_components): | |
| """Make income predictions using the trained model""" | |
| # Convert to DataFrame if needed | |
| if isinstance(data, dict): | |
| df = pd.DataFrame([data]) | |
| else: | |
| df = data.copy() | |
| # Get components | |
| model = model_components['model'] | |
| encoding_maps = model_components['encoding_maps'] | |
| feature_names = model_components['feature_names'] | |
| # Apply encodings to categorical columns | |
| for column in df.columns: | |
| if column in encoding_maps and column != 'income': | |
| df[column] = df[column].map(encoding_maps[column]) | |
| # Ensure we only use features that the model was trained on | |
| df_for_pred = df[feature_names].copy() | |
| # Make prediction | |
| prediction = model.predict(df_for_pred)[0] | |
| probabilities = model.predict_proba(df_for_pred)[0] | |
| # Get income label | |
| income_map_inverse = {v: k for k, v in encoding_maps['income'].items()} | |
| prediction_label = income_map_inverse[prediction] | |
| return { | |
| 'prediction': int(prediction), | |
| 'prediction_label': prediction_label, | |
| 'probability': float(probabilities[prediction]), | |
| 'probabilities': probabilities.tolist() | |
| } | |
| def validate_inputs(data): | |
| """Validate input data""" | |
| errors = [] | |
| # Age validation | |
| if data['age'] < 17 or data['age'] > 90: | |
| errors.append("Age should be between 17 and 90") | |
| # Education number validation | |
| if data['education_num'] < 1 or data['education_num'] > 16: | |
| errors.append("Education number should be between 1 and 16") | |
| # Hours per week validation | |
| if data['hours_per_week'] < 1 or data['hours_per_week'] > 99: | |
| errors.append("Hours per week should be between 1 and 99") | |
| # Capital gain/loss validation | |
| if data['capital_gain'] < 0 or data['capital_gain'] > 99999: | |
| errors.append("Capital gain should be between 0 and 99999") | |
| if data['capital_loss'] < 0 or data['capital_loss'] > 4356: | |
| errors.append("Capital loss should be between 0 and 4356") | |
| # Final weight validation | |
| if data['fnlwgt'] < 12285 or data['fnlwgt'] > 1484705: | |
| errors.append("Final weight should be between 12285 and 1484705") | |
| return errors | |
| def export_prediction(data, result): | |
| """Export prediction result to JSON""" | |
| export_data = { | |
| 'timestamp': datetime.now().isoformat(), | |
| 'input_data': data, | |
| 'prediction': { | |
| 'class': result['prediction_label'], | |
| 'confidence': result['probability'], | |
| 'raw_prediction': result['prediction'] | |
| } | |
| } | |
| return json.dumps(export_data, indent=2) | |
| def reset_session_state(): | |
| """Reset all input values to default""" | |
| keys_to_reset = [ | |
| 'age', 'workclass', 'fnlwgt', 'education_num', 'marital_status', | |
| 'occupation', 'relationship', 'race', 'sex', 'capital_gain', | |
| 'capital_loss', 'hours_per_week', 'native_country' | |
| ] | |
| for key in keys_to_reset: | |
| if key in st.session_state: | |
| del st.session_state[key] | |
| # Load model | |
| model_components = load_model() | |
| # Define mappings (from the original notebook) | |
| workclass_options = ['State-gov', 'Self-emp-not-inc', 'Private', 'Federal-gov', | |
| 'Local-gov', 'Self-emp-inc', 'Without-pay', 'Never-worked'] | |
| marital_status_options = ['Never-married', 'Married-civ-spouse', 'Divorced', | |
| 'Married-spouse-absent', 'Separated', 'Married-AF-spouse', 'Widowed'] | |
| occupation_options = ['Adm-clerical', 'Exec-managerial', 'Handlers-cleaners', 'Prof-specialty', | |
| 'Other-service', 'Sales', 'Craft-repair', 'Transport-moving', | |
| 'Farming-fishing', 'Machine-op-inspct', 'Tech-support', | |
| 'Protective-serv', 'Armed-Forces', 'Priv-house-serv'] | |
| relationship_options = ['Not-in-family', 'Husband', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'] | |
| race_options = ['White', 'Black', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other'] | |
| sex_options = ['Male', 'Female'] | |
| native_country_options = ['United-States', 'Cuba', 'Jamaica', 'India', 'Mexico', 'South', | |
| 'Puerto-Rico', 'Honduras', 'England', 'Canada', 'Germany', 'Iran', | |
| 'Philippines', 'Italy', 'Poland', 'Columbia', 'Cambodia', 'Thailand', 'Ecuador', | |
| 'Laos', 'Taiwan', 'Haiti', 'Portugal', 'Dominican-Republic', 'El-Salvador', | |
| 'France', 'Guatemala', 'China', 'Japan', 'Yugoslavia', 'Peru', | |
| 'Outlying-US(Guam-USVI-etc)', 'Scotland', 'Trinadad&Tobago', 'Greece', | |
| 'Nicaragua', 'Vietnam', 'Hong', 'Ireland', 'Hungary', 'Holand-Netherlands'] | |
| # Main app | |
| st.title("💰 Income Prediction App - Dr. Eng. Farrikh Alzami, M.Kom") | |
| st.markdown("Predict whether income exceeds $50K/year based on demographic data") | |
| # Create two columns for layout | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| st.subheader("📝 Input Features") | |
| # Create form for inputs | |
| with st.form("prediction_form"): | |
| # Demographic Information | |
| st.markdown("**Demographic Information**") | |
| col_demo1, col_demo2 = st.columns(2) | |
| with col_demo1: | |
| age = st.number_input("Age", min_value=17, max_value=90, value=39, key="age") | |
| sex = st.selectbox("Sex", sex_options, key="sex") | |
| race = st.selectbox("Race", race_options, key="race") | |
| with col_demo2: | |
| marital_status = st.selectbox("Marital Status", marital_status_options, key="marital_status") | |
| relationship = st.selectbox("Relationship", relationship_options, key="relationship") | |
| native_country = st.selectbox("Native Country", native_country_options, key="native_country") | |
| st.divider() | |
| # Work Information | |
| st.markdown("**Work Information**") | |
| col_work1, col_work2 = st.columns(2) | |
| with col_work1: | |
| workclass = st.selectbox("Work Class", workclass_options, key="workclass") | |
| occupation = st.selectbox("Occupation", occupation_options, key="occupation") | |
| hours_per_week = st.number_input("Hours per Week", min_value=1, max_value=99, value=40, key="hours_per_week") | |
| with col_work2: | |
| education_num = st.number_input("Education Level (Years)", min_value=1, max_value=16, value=10, key="education_num") | |
| fnlwgt = st.number_input("Final Weight", min_value=12285, max_value=1484705, value=77516, key="fnlwgt") | |
| st.divider() | |
| # Financial Information | |
| st.markdown("**Financial Information**") | |
| col_fin1, col_fin2 = st.columns(2) | |
| with col_fin1: | |
| capital_gain = st.number_input("Capital Gain", min_value=0, max_value=99999, value=0, key="capital_gain") | |
| with col_fin2: | |
| capital_loss = st.number_input("Capital Loss", min_value=0, max_value=4356, value=0, key="capital_loss") | |
| # Buttons | |
| col_btn1, col_btn2, col_btn3 = st.columns(3) | |
| with col_btn1: | |
| predict_button = st.form_submit_button("🔮 Predict", type="primary") | |
| with col_btn2: | |
| reset_button = st.form_submit_button("🔄 Reset") | |
| with col_btn3: | |
| export_button = st.form_submit_button("📤 Export Last Result") | |
| # Handle reset button | |
| if reset_button: | |
| reset_session_state() | |
| st.rerun() | |
| # Handle prediction | |
| if predict_button: | |
| # Collect input data | |
| input_data = { | |
| 'age': age, | |
| 'workclass': workclass, | |
| 'fnlwgt': fnlwgt, | |
| 'education_num': education_num, | |
| 'marital_status': marital_status, | |
| 'occupation': occupation, | |
| 'relationship': relationship, | |
| 'race': race, | |
| 'sex': sex, | |
| 'capital_gain': capital_gain, | |
| 'capital_loss': capital_loss, | |
| 'hours_per_week': hours_per_week, | |
| 'native_country': native_country | |
| } | |
| # Validate inputs | |
| validation_errors = validate_inputs(input_data) | |
| if validation_errors: | |
| with col2: | |
| st.error("❌ Validation Errors:") | |
| for error in validation_errors: | |
| st.error(f"• {error}") | |
| else: | |
| # Make prediction | |
| try: | |
| result = predict_income(input_data, model_components) | |
| # Store result in session state for export | |
| st.session_state['last_prediction'] = { | |
| 'input_data': input_data, | |
| 'result': result | |
| } | |
| with col2: | |
| st.subheader("🎯 Prediction Results") | |
| # Display prediction | |
| prediction_color = "green" if result['prediction_label'] == '>50K' else "orange" | |
| st.markdown(f"**Predicted Income:** :{prediction_color}[{result['prediction_label']}]") | |
| # Confidence level with gauge | |
| confidence = result['probability'] * 100 | |
| fig_gauge = go.Figure(go.Indicator( | |
| mode = "gauge+number+delta", | |
| value = confidence, | |
| domain = {'x': [0, 1], 'y': [0, 1]}, | |
| title = {'text': "Confidence Level (%)"}, | |
| gauge = { | |
| 'axis': {'range': [None, 100]}, | |
| 'bar': {'color': prediction_color}, | |
| 'steps': [ | |
| {'range': [0, 50], 'color': "lightgray"}, | |
| {'range': [50, 80], 'color': "yellow"}, | |
| {'range': [80, 100], 'color': "lightgreen"} | |
| ], | |
| 'threshold': { | |
| 'line': {'color': "red", 'width': 4}, | |
| 'thickness': 0.75, | |
| 'value': 90 | |
| } | |
| } | |
| )) | |
| fig_gauge.update_layout(height=300, margin=dict(l=20, r=20, t=40, b=20)) | |
| st.plotly_chart(fig_gauge, use_container_width=True) | |
| # Probability breakdown | |
| prob_df = pd.DataFrame({ | |
| 'Class': ['≤50K', '>50K'], | |
| 'Probability': result['probabilities'] | |
| }) | |
| fig_bar = px.bar( | |
| prob_df, | |
| x='Class', | |
| y='Probability', | |
| title='Probability Distribution', | |
| color='Probability', | |
| color_continuous_scale=['orange', 'green'] | |
| ) | |
| fig_bar.update_layout(height=300, margin=dict(l=20, r=20, t=40, b=20)) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| except Exception as e: | |
| with col2: | |
| st.error(f"❌ Prediction Error: {str(e)}") | |
| # Feature Importance section | |
| st.subheader("📊 Feature Importance") | |
| if 'model' in model_components: | |
| try: | |
| feature_names = model_components['feature_names'] | |
| feature_importance = model_components['model'].feature_importances_ | |
| importance_df = pd.DataFrame({ | |
| 'Feature': feature_names, | |
| 'Importance': feature_importance | |
| }).sort_values('Importance', ascending=True) | |
| fig_importance = px.bar( | |
| importance_df, | |
| x='Importance', | |
| y='Feature', | |
| orientation='h', | |
| title='Feature Importance in Decision Tree Model', | |
| color='Importance', | |
| color_continuous_scale='viridis' | |
| ) | |
| fig_importance.update_layout(height=400, margin=dict(l=20, r=20, t=40, b=20)) | |
| st.plotly_chart(fig_importance, use_container_width=True) | |
| except Exception as e: | |
| st.error(f"Error displaying feature importance: {str(e)}") | |
| # Handle export | |
| if export_button: | |
| if 'last_prediction' in st.session_state: | |
| export_data = export_prediction( | |
| st.session_state['last_prediction']['input_data'], | |
| st.session_state['last_prediction']['result'] | |
| ) | |
| st.download_button( | |
| label="📥 Download Prediction Results", | |
| data=export_data, | |
| file_name=f"income_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", | |
| mime="application/json" | |
| ) | |
| else: | |
| st.warning("⚠️ No prediction results to export. Please make a prediction first.") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("*Built with Streamlit • Dr. Eng. Farrikh Alzami, M.Kom*") |