Spaces:

alzami
/

ti_data_mining

Sleeping

App Files Files Community

alzami commited on Dec 11, 2025

Commit

8dfedb7

verified ·

1 Parent(s): dd3075d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +358 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,360 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+'''
+Materi Dr. Eng. Farrikh Alzami, M.Kom - Universitas Dian Nuswantoro
+'''
 import streamlit as st
+# Page configuration - MUST be first Streamlit command
+st.set_page_config(
+    page_title="Income Prediction App - Materi Dr.Eng. Farrikh Alzami, M.Kom",
+    page_icon="💰",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+import pandas as pd
+import numpy as np
+import joblib
+import plotly.express as px
+import plotly.graph_objects as go
+from datetime import datetime
+import json
+# Load model components
+@st.cache_resource
+def load_model():
+    """Load the trained model components"""
+    try:
+        components = joblib.load('income_prediction_components.joblib')
+        return components
+    except FileNotFoundError:
+        st.error("Model file 'income_prediction_components.joblib' not found!")
+        st.stop()
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.stop()
+def predict_income(data, model_components):
+    """Make income predictions using the trained model"""
+    # Convert to DataFrame if needed
+    if isinstance(data, dict):
+        df = pd.DataFrame([data])
+    else:
+        df = data.copy()
+    # Get components
+    model = model_components['model']
+    encoding_maps = model_components['encoding_maps']
+    feature_names = model_components['feature_names']
+    # Apply encodings to categorical columns
+    for column in df.columns:
+        if column in encoding_maps and column != 'income':
+            df[column] = df[column].map(encoding_maps[column])
+    # Ensure we only use features that the model was trained on
+    df_for_pred = df[feature_names].copy()
+    # Make prediction
+    prediction = model.predict(df_for_pred)[0]
+    probabilities = model.predict_proba(df_for_pred)[0]
+    # Get income label
+    income_map_inverse = {v: k for k, v in encoding_maps['income'].items()}
+    prediction_label = income_map_inverse[prediction]
+    return {
+        'prediction': int(prediction),
+        'prediction_label': prediction_label,
+        'probability': float(probabilities[prediction]),
+        'probabilities': probabilities.tolist()
+    }
+def validate_inputs(data):
+    """Validate input data"""
+    errors = []
+    # Age validation
+    if data['age'] < 17 or data['age'] > 90:
+        errors.append("Age should be between 17 and 90")
+    # Education number validation
+    if data['education_num'] < 1 or data['education_num'] > 16:
+        errors.append("Education number should be between 1 and 16")
+    # Hours per week validation
+    if data['hours_per_week'] < 1 or data['hours_per_week'] > 99:
+        errors.append("Hours per week should be between 1 and 99")
+    # Capital gain/loss validation
+    if data['capital_gain'] < 0 or data['capital_gain'] > 99999:
+        errors.append("Capital gain should be between 0 and 99999")
+    if data['capital_loss'] < 0 or data['capital_loss'] > 4356:
+        errors.append("Capital loss should be between 0 and 4356")
+    # Final weight validation
+    if data['fnlwgt'] < 12285 or data['fnlwgt'] > 1484705:
+        errors.append("Final weight should be between 12285 and 1484705")
+    return errors
+def export_prediction(data, result):
+    """Export prediction result to JSON"""
+    export_data = {
+        'timestamp': datetime.now().isoformat(),
+        'input_data': data,
+        'prediction': {
+            'class': result['prediction_label'],
+            'confidence': result['probability'],
+            'raw_prediction': result['prediction']
+        }
+    }
+    return json.dumps(export_data, indent=2)
+def reset_session_state():
+    """Reset all input values to default"""
+    keys_to_reset = [
+        'age', 'workclass', 'fnlwgt', 'education_num', 'marital_status',
+        'occupation', 'relationship', 'race', 'sex', 'capital_gain',
+        'capital_loss', 'hours_per_week', 'native_country'
+    ]
+    for key in keys_to_reset:
+        if key in st.session_state:
+            del st.session_state[key]
+# Load model
+model_components = load_model()
+# Define mappings (from the original notebook)
+workclass_options = ['State-gov', 'Self-emp-not-inc', 'Private', 'Federal-gov',
+                    'Local-gov', 'Self-emp-inc', 'Without-pay', 'Never-worked']
+marital_status_options = ['Never-married', 'Married-civ-spouse', 'Divorced',
+                         'Married-spouse-absent', 'Separated', 'Married-AF-spouse', 'Widowed']
+occupation_options = ['Adm-clerical', 'Exec-managerial', 'Handlers-cleaners', 'Prof-specialty',
+                     'Other-service', 'Sales', 'Craft-repair', 'Transport-moving',
+                     'Farming-fishing', 'Machine-op-inspct', 'Tech-support',
+                     'Protective-serv', 'Armed-Forces', 'Priv-house-serv']
+relationship_options = ['Not-in-family', 'Husband', 'Wife', 'Own-child', 'Unmarried', 'Other-relative']
+race_options = ['White', 'Black', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other']
+sex_options = ['Male', 'Female']
+native_country_options = ['United-States', 'Cuba', 'Jamaica', 'India', 'Mexico', 'South',
+                         'Puerto-Rico', 'Honduras', 'England', 'Canada', 'Germany', 'Iran',
+                         'Philippines', 'Italy', 'Poland', 'Columbia', 'Cambodia', 'Thailand', 'Ecuador',
+                         'Laos', 'Taiwan', 'Haiti', 'Portugal', 'Dominican-Republic', 'El-Salvador',
+                         'France', 'Guatemala', 'China', 'Japan', 'Yugoslavia', 'Peru',
+                         'Outlying-US(Guam-USVI-etc)', 'Scotland', 'Trinadad&Tobago', 'Greece',
+                         'Nicaragua', 'Vietnam', 'Hong', 'Ireland', 'Hungary', 'Holand-Netherlands']
+# Main app
+st.title("💰 Income Prediction App - Dr. Eng. Farrikh Alzami, M.Kom")
+st.markdown("Predict whether income exceeds $50K/year based on demographic data")
+# Create two columns for layout
+col1, col2 = st.columns([2, 1])
+with col1:
+    st.subheader("📝 Input Features")
+    # Create form for inputs
+    with st.form("prediction_form"):
+        # Demographic Information
+        st.markdown("**Demographic Information**")
+        col_demo1, col_demo2 = st.columns(2)
+        with col_demo1:
+            age = st.number_input("Age", min_value=17, max_value=90, value=39, key="age")
+            sex = st.selectbox("Sex", sex_options, key="sex")
+            race = st.selectbox("Race", race_options, key="race")
+        with col_demo2:
+            marital_status = st.selectbox("Marital Status", marital_status_options, key="marital_status")
+            relationship = st.selectbox("Relationship", relationship_options, key="relationship")
+            native_country = st.selectbox("Native Country", native_country_options, key="native_country")
+        st.divider()
+        # Work Information
+        st.markdown("**Work Information**")
+        col_work1, col_work2 = st.columns(2)
+        with col_work1:
+            workclass = st.selectbox("Work Class", workclass_options, key="workclass")
+            occupation = st.selectbox("Occupation", occupation_options, key="occupation")
+            hours_per_week = st.number_input("Hours per Week", min_value=1, max_value=99, value=40, key="hours_per_week")
+        with col_work2:
+            education_num = st.number_input("Education Level (Years)", min_value=1, max_value=16, value=10, key="education_num")
+            fnlwgt = st.number_input("Final Weight", min_value=12285, max_value=1484705, value=77516, key="fnlwgt")
+        st.divider()
+        # Financial Information
+        st.markdown("**Financial Information**")
+        col_fin1, col_fin2 = st.columns(2)
+        with col_fin1:
+            capital_gain = st.number_input("Capital Gain", min_value=0, max_value=99999, value=0, key="capital_gain")
+        with col_fin2:
+            capital_loss = st.number_input("Capital Loss", min_value=0, max_value=4356, value=0, key="capital_loss")
+        # Buttons
+        col_btn1, col_btn2, col_btn3 = st.columns(3)
+        with col_btn1:
+            predict_button = st.form_submit_button("🔮 Predict", type="primary")
+        with col_btn2:
+            reset_button = st.form_submit_button("🔄 Reset")
+        with col_btn3:
+            export_button = st.form_submit_button("📤 Export Last Result")
+# Handle reset button
+if reset_button:
+    reset_session_state()
+    st.rerun()
+# Handle prediction
+if predict_button:
+    # Collect input data
+    input_data = {
+        'age': age,
+        'workclass': workclass,
+        'fnlwgt': fnlwgt,
+        'education_num': education_num,
+        'marital_status': marital_status,
+        'occupation': occupation,
+        'relationship': relationship,
+        'race': race,
+        'sex': sex,
+        'capital_gain': capital_gain,
+        'capital_loss': capital_loss,
+        'hours_per_week': hours_per_week,
+        'native_country': native_country
+    }
+    # Validate inputs
+    validation_errors = validate_inputs(input_data)
+    if validation_errors:
+        with col2:
+            st.error("❌ Validation Errors:")
+            for error in validation_errors:
+                st.error(f"• {error}")
+    else:
+        # Make prediction
+        try:
+            result = predict_income(input_data, model_components)
+            # Store result in session state for export
+            st.session_state['last_prediction'] = {
+                'input_data': input_data,
+                'result': result
+            }
+            with col2:
+                st.subheader("🎯 Prediction Results")
+                # Display prediction
+                prediction_color = "green" if result['prediction_label'] == '>50K' else "orange"
+                st.markdown(f"**Predicted Income:** :{prediction_color}[{result['prediction_label']}]")
+                # Confidence level with gauge
+                confidence = result['probability'] * 100
+                fig_gauge = go.Figure(go.Indicator(
+                    mode = "gauge+number+delta",
+                    value = confidence,
+                    domain = {'x': [0, 1], 'y': [0, 1]},
+                    title = {'text': "Confidence Level (%)"},
+                    gauge = {
+                        'axis': {'range': [None, 100]},
+                        'bar': {'color': prediction_color},
+                        'steps': [
+                            {'range': [0, 50], 'color': "lightgray"},
+                            {'range': [50, 80], 'color': "yellow"},
+                            {'range': [80, 100], 'color': "lightgreen"}
+                        ],
+                        'threshold': {
+                            'line': {'color': "red", 'width': 4},
+                            'thickness': 0.75,
+                            'value': 90
+                        }
+                    }
+                ))
+                fig_gauge.update_layout(height=300, margin=dict(l=20, r=20, t=40, b=20))
+                st.plotly_chart(fig_gauge, use_container_width=True)
+                # Probability breakdown
+                prob_df = pd.DataFrame({
+                    'Class': ['≤50K', '>50K'],
+                    'Probability': result['probabilities']
+                })
+                fig_bar = px.bar(
+                    prob_df,
+                    x='Class',
+                    y='Probability',
+                    title='Probability Distribution',
+                    color='Probability',
+                    color_continuous_scale=['orange', 'green']
+                )
+                fig_bar.update_layout(height=300, margin=dict(l=20, r=20, t=40, b=20))
+                st.plotly_chart(fig_bar, use_container_width=True)
+        except Exception as e:
+            with col2:
+                st.error(f"❌ Prediction Error: {str(e)}")
+# Feature Importance section
+st.subheader("📊 Feature Importance")
+if 'model' in model_components:
+    try:
+        feature_names = model_components['feature_names']
+        feature_importance = model_components['model'].feature_importances_
+        importance_df = pd.DataFrame({
+            'Feature': feature_names,
+            'Importance': feature_importance
+        }).sort_values('Importance', ascending=True)
+        fig_importance = px.bar(
+            importance_df,
+            x='Importance',
+            y='Feature',
+            orientation='h',
+            title='Feature Importance in Decision Tree Model',
+            color='Importance',
+            color_continuous_scale='viridis'
+        )
+        fig_importance.update_layout(height=400, margin=dict(l=20, r=20, t=40, b=20))
+        st.plotly_chart(fig_importance, use_container_width=True)
+    except Exception as e:
+        st.error(f"Error displaying feature importance: {str(e)}")
+# Handle export
+if export_button:
+    if 'last_prediction' in st.session_state:
+        export_data = export_prediction(
+            st.session_state['last_prediction']['input_data'],
+            st.session_state['last_prediction']['result']
+        )
+        st.download_button(
+            label="📥 Download Prediction Results",
+            data=export_data,
+            file_name=f"income_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
+            mime="application/json"
+        )
+    else:
+        st.warning("⚠️ No prediction results to export. Please make a prediction first.")
+# Footer
+st.markdown("---")
+st.markdown("*Built with Streamlit • Dr. Eng. Farrikh Alzami, M.Kom*")