File size: 2,917 Bytes
c4bf16e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edda823
c4bf16e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3880f80
 
c4bf16e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler,MinMaxScaler, OneHotEncoder
import shap
from streamlit_shap import st_shap

# Page configuration
st.set_page_config(
    page_title="Kiva loan amount predictor",
    page_icon="💰")

st.title('Predict Kiva loan amounts')


# Load model and preprocessing objects
@st.cache_resource
def load_model_objects():
    model_rf = joblib.load('best_reg.joblib')
    scaler = joblib.load('scaler.joblib')
    ohe = joblib.load('ohe.joblib')
    return model_rf, scaler, ohe

model_rf, scaler, ohe = load_model_objects()

# Create SHAP explainer
explainer = shap.TreeExplainer(model_rf)

# App description
with st.expander("What's this app?"):
    st.markdown("""
    This app helps you determine how much you will be succesfully funded with on Kiva
    """)

st.subheader('Describe what you want to loan to')

# User inputs
col1, col2 = st.columns(2)

with col1:
    Sector = st.selectbox('sector', options=ohe.categories_[0])
    Country = st.selectbox('country', options=ohe.categories_[1])
    Gender = st.selectbox('borrower_genders', options=ohe.categories_[2])

with col2:
    term_in_months = st.number_input('Lenght of loan in months', min_value=0, value=1)
    lender_count = st.number_input('Number of Lenders', min_value=1,value=1)

# Prediction button
if st.button('Predict loan amount 🚀'):
    # Prepare categorical features
    cat_features = pd.DataFrame({'sector': [Sector], 'country': [Country],'borrower_genders': [Gender]})
    cat_encoded = pd.DataFrame(ohe.transform(cat_features).todense(), 
                               columns=ohe.get_feature_names_out(['sector', 'country', 'borrower_genders']))
    
    # Prepare numerical features
    num_features = pd.DataFrame({
        'term_in_months': [term_in_months],
        'lender_count': [lender_count],
       })
    num_scaled = pd.DataFrame(scaler.transform(num_features), columns=num_features.columns)
    
    # Combine features
    features = pd.concat([num_scaled, cat_encoded], axis=1)
    
    # Make prediction
    predicted_price = model_rf.predict(features)[0]
    
    # Display prediction
    st.metric(label="Predicted loan amount", value=f'{round(predicted_price)} USD')
    
    
    # SHAP explanation
    st.subheader('Price Factors Explained 🤖')
    shap_values = explainer.shap_values(features)
    st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=400, width=600)
    
    st.markdown("""
    This plot shows how each feature contributes to the predicted price:
    - Blue bars push the loan amount lower
    - Red bars push the loan amount higher
    - The length of each bar indicates the strength of the feature's impact
    """)

# Footer
st.markdown("---")
st.markdown("Developed with ❤️ using Streamlit")