Spaces:
Build error
Build error
Alexis Galvis commited on
Commit ·
d790594
1
Parent(s): 84960aa
app
Browse files- app.py +96 -0
- data/output/loan_scores.pkl +3 -0
- models/loan_model_2.h5 +3 -0
- models/preprocessor.pkl +3 -0
- requirements.txt +0 -0
- utils/__pycache__/calculate_probability_prediction.cpython-310.pyc +0 -0
- utils/__pycache__/category_classification.cpython-310.pyc +0 -0
- utils/__pycache__/create_and_save_plot.cpython-310.pyc +0 -0
- utils/calculate_probability_prediction.py +48 -0
- utils/category_classification.py +15 -0
- utils/create_and_save_plot.py +21 -0
app.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import numpy as np
|
| 4 |
+
import tensorflow as tf
|
| 5 |
+
import joblib
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
from utils.create_and_save_plot import plot_credit_score_distribution
|
| 9 |
+
from utils.calculate_probability_prediction import probability_to_score_v3
|
| 10 |
+
from utils.category_classification import credit_score_range_classification
|
| 11 |
+
|
| 12 |
+
# Cargar modelo y preprocesador
|
| 13 |
+
model = tf.keras.models.load_model("models/loan_model_2.h5")
|
| 14 |
+
preprocessor = joblib.load("models/preprocessor.pkl")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def calculate_dti(annual_income, total_monthly_debt):
|
| 18 |
+
"""Calcula el Debt-to-Income Ratio (DTI)"""
|
| 19 |
+
if annual_income > 0:
|
| 20 |
+
monthly_income = annual_income / 12
|
| 21 |
+
dti = (total_monthly_debt / monthly_income) * 100
|
| 22 |
+
return round(dti, 2)
|
| 23 |
+
return 0
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# Interfaz Streamlit
|
| 27 |
+
st.title("Predicción de Puntaje de Crédito")
|
| 28 |
+
|
| 29 |
+
st.sidebar.header("Ingrese los valores del préstamo")
|
| 30 |
+
|
| 31 |
+
# Nuevas variables de entrada con descripciones
|
| 32 |
+
annual_inc = st.sidebar.number_input(
|
| 33 |
+
"Ingreso Anual (USD)", min_value=0.0, value=36000.0,
|
| 34 |
+
help="(float) Ingreso anual en dólares antes de impuestos."
|
| 35 |
+
)
|
| 36 |
+
emp_length = st.sidebar.selectbox(
|
| 37 |
+
"Tiempo en el Trabajo", ["< 1 year", "1-5 years", "6-10 years", "10+ years"],
|
| 38 |
+
help="(categoría) Duración del empleo actual."
|
| 39 |
+
)
|
| 40 |
+
home_ownership = st.sidebar.selectbox(
|
| 41 |
+
"Tipo de Propiedad", ["OWN", "MORTGAGE", "RENT"],
|
| 42 |
+
help="(categoría) Tipo de propiedad del solicitante."
|
| 43 |
+
)
|
| 44 |
+
purpose = st.sidebar.selectbox(
|
| 45 |
+
"Propósito del Préstamo", ["debt_consolidation", "credit_card", "home_improvement"],
|
| 46 |
+
help="(categoría) Razón principal del préstamo."
|
| 47 |
+
)
|
| 48 |
+
zip_code = st.sidebar.number_input(
|
| 49 |
+
"Código Postal", min_value=10000, max_value=99999, step=1, value=90210,
|
| 50 |
+
help="(int) Código postal de residencia del solicitante."
|
| 51 |
+
)
|
| 52 |
+
open_acc = st.sidebar.number_input(
|
| 53 |
+
"Cuentas Abiertas", min_value=0, step=1, value=5,
|
| 54 |
+
help="(int) Número total de cuentas de crédito abiertas."
|
| 55 |
+
)
|
| 56 |
+
total_monthly_debt = st.sidebar.number_input(
|
| 57 |
+
"Pagos Mensuales de Deuda (USD)", min_value=0.0, value=600.0,
|
| 58 |
+
help="(float) Total de pagos mensuales de deuda (préstamos, tarjetas, hipotecas)."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Calcular DTI
|
| 62 |
+
dti = calculate_dti(annual_inc, total_monthly_debt)
|
| 63 |
+
st.sidebar.write(f"DTI Calculado: {dti}%")
|
| 64 |
+
|
| 65 |
+
if st.sidebar.button("Predecir"):
|
| 66 |
+
# Crear el DataFrame con los datos de entrada
|
| 67 |
+
input_data = {
|
| 68 |
+
"annual_inc": [annual_inc],
|
| 69 |
+
"emp_length": [emp_length],
|
| 70 |
+
"home_ownership": [home_ownership],
|
| 71 |
+
"purpose": [purpose],
|
| 72 |
+
"zip_code": [zip_code],
|
| 73 |
+
"open_acc": [open_acc],
|
| 74 |
+
"dti": [dti]
|
| 75 |
+
}
|
| 76 |
+
df = pd.DataFrame(input_data)
|
| 77 |
+
|
| 78 |
+
# Preprocesar y predecir
|
| 79 |
+
data_processed = preprocessor.transform(df)
|
| 80 |
+
predictions = model.predict(data_processed).ravel()
|
| 81 |
+
y_scores = joblib.load(os.path.join('data', 'output', 'loan_scores.pkl'))
|
| 82 |
+
credit_score = probability_to_score_v3(predictions[0])
|
| 83 |
+
|
| 84 |
+
# Mostrar resultado
|
| 85 |
+
color = credit_score_range_classification(credit_score)
|
| 86 |
+
st.markdown(
|
| 87 |
+
f'<h2 style="color:{color};">Puntaje de Crédito Estimado: {credit_score:.2f}</h2>',
|
| 88 |
+
unsafe_allow_html=True
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Generar gráfico
|
| 92 |
+
buffer = plot_credit_score_distribution(y_scores, credit_score)
|
| 93 |
+
image_data = buffer.getvalue()
|
| 94 |
+
buffer.close()
|
| 95 |
+
|
| 96 |
+
st.image(image_data, caption="Distribución de Puntajes de Crédito", use_container_width=True)
|
data/output/loan_scores.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67b1ef0919f5cff5e4732fa33c14334d830acf50b622c32bd44a63f8e4f5f98c
|
| 3 |
+
size 474033
|
models/loan_model_2.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24adab470382c8e80311421fe2b6fbd1cf73d665888a93b3a47303f55bd9a3e7
|
| 3 |
+
size 708384
|
models/preprocessor.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c2d49b06300e354172e421ff3e97c3c9fcc4cdafad346ad64e5da1275478b55
|
| 3 |
+
size 5130
|
requirements.txt
ADDED
|
Binary file (454 Bytes). View file
|
|
|
utils/__pycache__/calculate_probability_prediction.cpython-310.pyc
ADDED
|
Binary file (1.27 kB). View file
|
|
|
utils/__pycache__/category_classification.cpython-310.pyc
ADDED
|
Binary file (528 Bytes). View file
|
|
|
utils/__pycache__/create_and_save_plot.cpython-310.pyc
ADDED
|
Binary file (964 Bytes). View file
|
|
|
utils/calculate_probability_prediction.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def probability_to_score_v3(prob, base_score=300, max_score=850, threshold=0.326,
|
| 5 |
+
expansion_factor_low=3, expansion_factor_high=0.7):
|
| 6 |
+
"""
|
| 7 |
+
Convierte probabilidades en puntajes de crédito con expansión no lineal
|
| 8 |
+
para distribuir mejor en los extremos.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
prob (float): Probabilidad de default.
|
| 12 |
+
base_score (int): Puntaje base.
|
| 13 |
+
max_score (int): Puntaje máximo.
|
| 14 |
+
threshold (float): Valor de corte óptimo.
|
| 15 |
+
expansion_factor_low (float): Factor para expandir la parte baja del rango.
|
| 16 |
+
expansion_factor_high (float): Factor para expandir la parte alta del rango.
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
score (float): Puntaje de crédito ajustado.
|
| 20 |
+
"""
|
| 21 |
+
# Invertir la probabilidad para que mayor valor sea mejor score
|
| 22 |
+
inverted_prob = 1 - prob
|
| 23 |
+
|
| 24 |
+
# Punto de corte invertido
|
| 25 |
+
inverted_threshold = 1 - threshold
|
| 26 |
+
|
| 27 |
+
# Determinar si es un score alto o bajo
|
| 28 |
+
if inverted_prob >= inverted_threshold: # Buenos clientes
|
| 29 |
+
# Normalizar la probabilidad en el rango de buenos
|
| 30 |
+
normalized = (inverted_prob - inverted_threshold) / (1 - inverted_threshold)
|
| 31 |
+
# Aplicar expansión no lineal
|
| 32 |
+
transformed = normalized ** expansion_factor_high
|
| 33 |
+
# Mapear al rango superior
|
| 34 |
+
mid_score = 550 # Punto medio del rango
|
| 35 |
+
score = mid_score + (max_score - mid_score) * transformed
|
| 36 |
+
else: # Malos clientes
|
| 37 |
+
# Normalizar la probabilidad en el rango de malos
|
| 38 |
+
normalized = inverted_prob / inverted_threshold
|
| 39 |
+
# Aplicar expansión no lineal para los scores bajos
|
| 40 |
+
transformed = normalized ** expansion_factor_low
|
| 41 |
+
# Mapear al rango inferior
|
| 42 |
+
mid_score = 550 # Punto medio del rango
|
| 43 |
+
score = base_score + (mid_score - base_score) * transformed
|
| 44 |
+
|
| 45 |
+
# Asegurar que el score esté dentro del rango permitido
|
| 46 |
+
score = np.clip(score, base_score, max_score)
|
| 47 |
+
|
| 48 |
+
return score
|
utils/category_classification.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
def credit_score_range_classification(credit_score):
|
| 3 |
+
color = "white"
|
| 4 |
+
if (credit_score >= 300) and (credit_score <= 579):
|
| 5 |
+
color = "red"
|
| 6 |
+
elif (credit_score >= 580) and (credit_score <= 669):
|
| 7 |
+
color = "orange"
|
| 8 |
+
elif (credit_score >= 670) and (credit_score <= 739):
|
| 9 |
+
color = "yellow"
|
| 10 |
+
elif (credit_score >= 740) and (credit_score <= 799):
|
| 11 |
+
color = "lightgreen"
|
| 12 |
+
else:
|
| 13 |
+
color = "green"
|
| 14 |
+
|
| 15 |
+
return color
|
utils/create_and_save_plot.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
|
| 4 |
+
def plot_credit_score_distribution(scores, point):
|
| 5 |
+
plt.figure(figsize=(10, 6))
|
| 6 |
+
plt.hist(scores, bins=30, color='green', alpha=0.6)
|
| 7 |
+
plt.yscale('log')
|
| 8 |
+
plt.axvline(x=point, color='red', linestyle='--', label=f'Puntaje {point}')
|
| 9 |
+
plt.scatter(point, 1, color='red', s=100, zorder=5)
|
| 10 |
+
plt.title("Distribución de Puntajes de Crédito (300-850)")
|
| 11 |
+
plt.xlabel("Puntaje")
|
| 12 |
+
plt.ylabel("Frecuencia (escala logarítmica)")
|
| 13 |
+
plt.grid(True)
|
| 14 |
+
plt.legend()
|
| 15 |
+
|
| 16 |
+
buffer = BytesIO()
|
| 17 |
+
plt.savefig(buffer, format='png')
|
| 18 |
+
buffer.seek(0) # Mover el puntero al inicio del buffer
|
| 19 |
+
plt.close() # Cerrar la figura para liberar memoria
|
| 20 |
+
|
| 21 |
+
return buffer
|