| |
| import pandas as pd |
| import numpy as np |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| import plotly.express as px |
| import plotly.graph_objects as go |
| import statsmodels.formula.api as smf |
| import itertools |
| import io |
| import gradio as gr |
| import warnings |
| warnings.filterwarnings("ignore") |
|
|
|
|
| |
| try: |
| |
| |
| seguros = pd.read_csv('Costo_Seguro.csv') |
| |
| except Exception as e: |
| print(f"Fallo al cargar el archivo: {e}.") |
|
|
| modelo_RLM = None |
| modelo_RLM_log = None |
|
|
| |
| formula = 'seguro ~ edad + imc + C(genero) + C(hijos) + C(fumador) + C(region)' |
| modelo_RLM = smf.ols(formula, data = seguros).fit() |
|
|
| |
| formula = 'np.log(seguro) ~ edad + imc + C(genero) + C(hijos) + C(fumador) + C(region)' |
| modelo_RLM_log = smf.ols(formula, data = seguros).fit() |
|
|
| |
| |
|
|
| |
| |
| |
|
|
| |
|
|
| def obtener_info_dataset(): |
|
|
| |
| filas, columnas = seguros.shape |
| info_shape = f"**{filas}** filas, **{columnas}** columnas" |
|
|
| |
| info_head = seguros.head(10).to_markdown(index=False) |
|
|
| |
| info_describe = seguros.describe().to_markdown() |
|
|
| return info_shape, info_head, info_describe |
|
|
|
|
| def generar_graficos(): |
|
|
| plt1 = px.histogram(seguros, x='seguro', nbins=50, |
| title="Histograma del Costo del Seguro", |
| labels={'seguro': 'Costo del Seguro'}) |
| |
|
|
| plt2 = px.box(seguros, x='genero', y='seguro', |
| title="Distribuci贸n del Costo del Seguro por G茅nero", |
| labels={'genero': 'G茅nero del Asegurado', 'seguro': 'Costo del Seguro'}) |
| |
|
|
| plt3 = px.box(seguros, x='fumador', y='seguro', |
| title="Distribuci贸n del Costo del Seguro por Condici贸n de Fumador", |
| labels={'fumador': 'Condici贸n de Fumador del Asegurado', 'seguro': 'Costo del Seguro'}) |
| |
|
|
| plt4 = px.box(seguros, x='region', y='seguro', |
| title="Distribuci贸n del Costo del Seguro por Regiones", |
| labels={'region': 'Regi贸n de Origen del Asegurado', 'seguro': 'Costo del Seguro'}) |
| |
|
|
| plt5 = px.scatter(seguros, x='edad', y='seguro', |
| title="Edad vs. Costo del Seguro", |
| labels={'edad': 'Edad del Asegurado', 'seguro': 'Costo del Seguro'}) |
| |
|
|
| plt6 = px.scatter(seguros, x='imc', y='seguro', |
| title="IMC vs. Costo del Seguro", |
| labels={'imc': '脥ndice de Masa Corporal del Asegurado', 'seguro': 'Costo del Seguro'}) |
| |
|
|
| return plt1, plt2, plt3, plt4, plt5, plt6 |
|
|
|
|
| def calcular_RLS(): |
|
|
| |
| RLS_1 = smf.ols('seguro ~ edad', data=seguros).fit() |
| summary_text_1 = RLS_1.summary().as_text() |
| data1_1 = f"```\n{summary_text_1}\n```" |
| |
| r2 = RLS_1.rsquared |
| adj = RLS_1.rsquared_adj |
| rmse = np.sqrt(np.mean(RLS_1.resid**2)) |
| data1_2 = f"<b>M茅tricas:</b><br><b>R虏 = {r2:.4f}</b><br><b>R虏 Ajustado = {adj:.4f}</b><br><b>RMSE = {rmse:.2f}</b>" |
| |
| b0, b1 = RLS_1.params |
| data1_3 = f"<b>Ecuaci贸n de la Recta:</b><br><b>Costo del Seguro = {b0:.2f} + {b1:.2f} * Edad</b>" |
| |
| grid1 = np.linspace(seguros['edad'].min(), seguros['edad'].max(), 100) |
| preds1 = RLS_1.predict(pd.DataFrame({'edad': grid1})) |
| fig1 = go.Figure([ |
| go.Scatter(x=seguros['edad'], y=seguros['seguro'], mode='markers', name='Datos'), |
| go.Scatter(x=grid1, y=preds1, mode='lines', name='Recta') |
| ]) |
| fig1.update_layout( |
| xaxis_title='Edad', yaxis_title='Costo del Seguro') |
| |
|
|
| |
| RLS_2 = smf.ols('seguro ~ imc', data=seguros).fit() |
| summary_text_2 = RLS_1.summary().as_text() |
| data2_1 = f"```\n{summary_text_2}\n```" |
| |
| r2 = RLS_2.rsquared |
| adj = RLS_2.rsquared_adj |
| rmse = np.sqrt(np.mean(RLS_2.resid**2)) |
| data2_2 = f"<b>M茅tricas:</b><br><b>R虏 = {r2:.4f}</b><br><b>R虏 Ajustado = {adj:.4f}</b><br><b>RMSE = {rmse:.2f}</b>" |
| |
| b0, b1 = RLS_2.params |
| data2_3 = f"<b>Ecuaci贸n de la Recta:</b><br><b>Costo del Seguro = {b0:.2f} + {b1:.2f} * IMC</b>" |
| |
| grid2 = np.linspace(seguros['imc'].min(), seguros['imc'].max(), 100) |
| preds2 = RLS_2.predict(pd.DataFrame({'imc': grid2})) |
| fig2 = go.Figure([ |
| go.Scatter(x=seguros['imc'], y=seguros['seguro'], mode='markers', name='Datos'), |
| go.Scatter(x=grid2, y=preds2, mode='lines', name='Recta') |
| ]) |
| fig2.update_layout( |
| xaxis_title='IMC', yaxis_title='Costo del Seguro') |
| |
|
|
| return data1_1, data1_2, data1_3, data2_1, data2_2, data2_3, fig1, fig2 |
|
|
|
|
| def calcular_RLM(): |
| summary_text = modelo_RLM.summary().as_text() |
| data1 = f"```\n{summary_text}\n```" |
| |
| r2 = modelo_RLM.rsquared |
| adj = modelo_RLM.rsquared_adj |
| rmse = np.sqrt(np.mean(modelo_RLM.resid**2)) |
| data2 = f"<b>M茅tricas:</b><br><b>R虏 = {r2:.4f}</b><br><b>R虏 Ajustado = {adj:.4f}</b><br><b>RMSE = {rmse:.2f}</b>" |
|
|
| |
| coef_orig = modelo_RLM.params |
| terms = [f"{coef_orig['Intercept']:.2f}"] |
| for name, coef in coef_orig.items(): |
| if name == 'Intercept': continue |
| terms.append(f"{coef:+.2f}*{name}") |
| data3 = "Costo del Seguro = " + " ".join(terms) |
| data3 = f"<b>{data3}</b>" |
|
|
| return data1, data2, data3 |
|
|
|
|
| def graficar_residuos(): |
|
|
| fig, axes = plt.subplots(1, 2, figsize=(12,5)) |
| |
| axes[0].scatter(modelo_RLM.fittedvalues, modelo_RLM.resid, alpha=0.5) |
| axes[0].axhline(0, color='red') |
| axes[0].set(title='Residuos vs Costo del Seguro Ajustado', xlabel = 'Costo Ajustado', ylabel = 'Residuos') |
|
|
| |
| axes[1].scatter(modelo_RLM_log.fittedvalues, modelo_RLM_log.resid, color='darkgreen', alpha=0.5) |
| axes[1].axhline(0, color='red') |
| axes[1].set(title='Residuos vs Logaritmo del Costo del Seguro Ajustado', xlabel = 'Logaritmo del Costo Ajustado', ylabel = 'Residuos') |
| plt.tight_layout() |
| |
| |
| |
| |
| return plt |
|
|
|
|
| def predecir_costo(edad, genero, imc, hijos, fumador, region): |
| global modelo_RLM |
|
|
| if modelo_RLM is None: |
| |
| |
| return 0, 0, "ERROR: El Modelo de RLM no est谩 disponible." |
|
|
| nuevo = pd.DataFrame({ |
| 'edad': [edad], |
| 'genero': [genero], |
| 'imc': [imc], |
| 'hijos': [hijos], |
| 'fumador': [fumador], |
| 'region': [region] |
| }) |
| |
| |
| prediccion1 = modelo_RLM.predict(nuevo)[0] |
| |
| |
| prediccion_log = modelo_RLM_log.predict(nuevo)[0] |
| |
| prediccion2 = np.exp(prediccion_log) |
|
|
| return prediccion1, prediccion2 |
|
|
| |
|
|
| with gr.Blocks() as appweb: |
| with gr.Row(): |
| gr.Image('encabezado.png', container = False) |
| |
| |
|
|
| |
| with gr.Tabs(): |
|
|
| |
| with gr.TabItem("Inicio") as tab_inicio: |
| gr.Markdown("## DATASET DE COSTOS DE SEGUROS PERSONALES") |
| gr.Markdown("### Estructura:") |
| data1_output = gr.Markdown() |
| gr.Markdown("### Primeras 10 filas:") |
| data2_output = gr.Markdown() |
| gr.Markdown("### Estad铆sticas Generales:") |
| data3_output = gr.Markdown() |
|
|
| |
| appweb.load(fn = obtener_info_dataset, inputs = None, |
| outputs=[data1_output, data2_output, data3_output]) |
|
|
| |
| |
| |
|
|
| |
| with gr.TabItem("Gr谩ficos") as tab_graficos: |
| gr.Markdown("## VISUALIZACI脫N EXPLORATORIA DE DATOS") |
|
|
| with gr.Row(): |
| plot1_output = gr.Plot(label=None, show_label=False) |
| plot2_output = gr.Plot(label=None, show_label=False) |
| with gr.Row(): |
| plot3_output = gr.Plot(label=None, show_label=False) |
| plot4_output = gr.Plot(label=None, show_label=False) |
| with gr.Row(): |
| plot5_output = gr.Plot(label=None, show_label=False) |
| plot6_output = gr.Plot(label=None, show_label=False) |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| appweb.load(fn = generar_graficos, inputs = None, |
| outputs = [plot1_output, plot2_output, plot3_output, |
| plot4_output, plot5_output, plot6_output]) |
|
|
|
|
| |
| with gr.TabItem("Regresi贸n Simple") as tab_reg_simple: |
| gr.Markdown("# REGRESIONES LINEALES SIMPLES") |
|
|
| gr.Markdown("## Costo del Seguro vs. Edad") |
| reg11_output = gr.Markdown() |
| with gr.Row(): |
| reg12_output = gr.Markdown() |
| reg13_output = gr.Markdown() |
| plotreg1_output = gr.Plot(label=None, show_label=False) |
|
|
| gr.Markdown("## Costo del Seguro vs. IMC") |
| reg21_output = gr.Markdown() |
| with gr.Row(): |
| reg22_output = gr.Markdown() |
| reg23_output = gr.Markdown() |
| plotreg2_output = gr.Plot(label=None, show_label=False) |
|
|
| tab_reg_simple.select(fn = calcular_RLS, inputs = None, |
| outputs = [reg11_output, reg12_output, reg13_output, |
| reg21_output, reg22_output, reg23_output, |
| plotreg1_output, plotreg2_output]) |
|
|
|
|
| |
| with gr.TabItem("Regresi贸n M煤ltiple") as tab_reg_multiple: |
| gr.Markdown("# REGRESI脫N LINEAL M脷LTIPLE") |
|
|
| regm1_output = gr.Markdown() |
| regm2_output = gr.Markdown() |
| gr.Markdown("### Ecuaci贸n de la Regresi贸n") |
| regm3_output = gr.Markdown() |
|
|
| tab_reg_multiple.select(fn = calcular_RLM, inputs = None, |
| outputs = [regm1_output, regm2_output, regm3_output]) |
|
|
|
|
| |
| with gr.TabItem("Residuos") as tab_residuos: |
| gr.Markdown("## COMPARACI脫N DE RESIDUOS ENTRE COSTO DEL SEGURO Y LOG(COSTO DEL SEGURO)") |
| plotres_output = gr.Plot(label=None, show_label=False) |
|
|
| tab_residuos.select(fn = graficar_residuos, inputs = None, outputs = plotres_output) |
|
|
|
|
| |
| with gr.TabItem("Predicci贸n de Costos"): |
| gr.Markdown("## PREDICCI脫N DE COSTOS DE SEGURO PERSONAL") |
|
|
| with gr.Row(): |
| input_edad = gr.Slider(minimum=18, maximum=80, step=1, value=30, label="Edad") |
| input_genero = gr.Radio(choices=["femenino", "masculino"], label="G茅nero", value="femenino") |
| with gr.Row(): |
| input_imc = gr.Slider(minimum=15, maximum=60, step=0.5, value=30.0, label="脥ndice de Masa Corporal") |
| input_fumador = gr.Radio(choices=["si", "no"], label="Fumador", value="no") |
| with gr.Row(): |
| input_hijos = gr.Slider(minimum=0, maximum=5, step=1, value=0, label="Cantidad de Hijos") |
| input_region = gr.Radio(choices=["NO", "NE", "SO", "SE"], label="Regi贸n", value="NO") |
|
|
| gr.Markdown("---") |
|
|
| with gr.Row(): |
| btn_predecir = gr.Button("Predecir Costo") |
| output_costo = gr.Number(label="Costo del Seguro (Modelo Lineal)", precision=2, value=0.00) |
| output_costo_log = gr.Number(label="Costo del Seguro (Modelo Logar铆tmico)", precision=2, value=0.00) |
|
|
| btn_predecir.click(fn = predecir_costo, inputs = [input_edad, input_genero, input_imc, input_hijos, |
| input_fumador, input_region], outputs = [output_costo, output_costo_log]) |
|
|
| |
| |
| |
| appweb.launch(server_name="0.0.0.0", server_port=7860) |