Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,173 +1,132 @@
|
|
| 1 |
-
# Vasculitis ANCA - App de Análisis Estadístico y Geoespacial (Unificada con PM2.5 y lista para Hugging Face)
|
| 2 |
-
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
| 6 |
-
import seaborn as sns
|
| 7 |
import matplotlib.pyplot as plt
|
|
|
|
| 8 |
import plotly.express as px
|
| 9 |
import folium
|
| 10 |
from folium.plugins import HeatMap
|
| 11 |
from scipy.stats import chi2_contingency
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
df = df[~df['Documento de identidad'].isin(to_exclude)]
|
| 24 |
-
df = df[(df['Edad en años del paciente'] >= 0) & (df['Edad en años del paciente'] <= 120)]
|
| 25 |
-
|
| 26 |
-
# Mapeo de antecedentes
|
| 27 |
-
transform_dict = {
|
| 28 |
-
'Antecedente personal de diabetes': {0: 'No', 1: 'Sí'},
|
| 29 |
-
'Antecedente personal de falla cardíaca': {0: 'No', 1: 'Sí'},
|
| 30 |
-
'Antecedente personal de EPOC': {0: 'No', 1: 'Sí'},
|
| 31 |
-
'Antecedente personal de Hipertensión arterial': {0: 'No', 1: 'Sí'}
|
| 32 |
}
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
#
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
if df[x].dtype == 'object' and df[y].dtype == 'object':
|
| 55 |
-
|
| 56 |
-
elif df[x].dtype == 'object'
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
else:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
def
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
fig.write_html("chi2_heatmap.html")
|
| 72 |
-
return f"Chi2 = {chi2:.2f}, p = {p:.4f}, gl = {dof}", table, "chi2_heatmap.html"
|
| 73 |
-
|
| 74 |
-
# Mapa de calor
|
| 75 |
-
def mapa_calor():
|
| 76 |
-
mapa = folium.Map(location=[4.7,-74.1], zoom_start=11)
|
| 77 |
-
puntos = df.dropna(subset=['Coordenada de residencia-latitud','Coordenada de residencia-longitud'])
|
| 78 |
-
heat = [[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']] for _, row in puntos.iterrows()]
|
| 79 |
-
HeatMap(heat).add_to(mapa)
|
| 80 |
-
for _, row in puntos.iterrows():
|
| 81 |
folium.Marker(
|
| 82 |
-
location=[
|
| 83 |
-
popup=
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
f"<b>Creatinina:</b> {row.get('Creatinina', 'N/A')}"
|
| 88 |
-
)
|
| 89 |
-
).add_to(mapa)
|
| 90 |
-
mapa.save("mapa.html")
|
| 91 |
-
with open("mapa.html", 'r', encoding='utf-8') as f:
|
| 92 |
return f.read()
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
folium.CircleMarker(
|
| 99 |
-
location=[row['LATITUD'], row['LONGITUD']],
|
| 100 |
-
radius=7,
|
| 101 |
-
fill=True,
|
| 102 |
-
popup=f"{row['Estacion']} ({row['LOCALIDAD']}): {row['PM25_promedio_2019']} ug/m3",
|
| 103 |
-
color="red",
|
| 104 |
-
fill_opacity=0.7
|
| 105 |
-
).add_to(mapa)
|
| 106 |
-
mapa.save("pm25.html")
|
| 107 |
-
with open("pm25.html", 'r', encoding='utf-8') as f:
|
| 108 |
-
return f.read()
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
clustering = DBSCAN(eps=0.5, min_samples=3).fit(scaled)
|
| 116 |
-
puntos['Cluster'] = clustering.labels_
|
| 117 |
-
mapa = folium.Map(location=[4.7,-74.1], zoom_start=11)
|
| 118 |
-
for _, row in puntos.iterrows():
|
| 119 |
-
folium.CircleMarker(
|
| 120 |
-
location=[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']],
|
| 121 |
-
radius=5,
|
| 122 |
-
fill=True,
|
| 123 |
-
color="#%06x" % (hash(row['Cluster']) & 0xFFFFFF),
|
| 124 |
-
popup=f"Cluster {row['Cluster']}"
|
| 125 |
-
).add_to(mapa)
|
| 126 |
-
mapa.save("cluster.html")
|
| 127 |
-
with open("cluster.html", 'r', encoding='utf-8') as f:
|
| 128 |
-
return f.read()
|
| 129 |
|
| 130 |
-
#
|
|
|
|
|
|
|
| 131 |
with gr.Blocks() as demo:
|
| 132 |
-
gr.Markdown("# Tablero Vasculitis ANCA
|
| 133 |
-
|
| 134 |
-
with gr.Tab("
|
| 135 |
-
gr.
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
gr.
|
|
|
|
| 151 |
|
| 152 |
with gr.Tab("Chi-cuadrado"):
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
htmlmap3 = gr.HTML()
|
| 166 |
-
gr.Button("Detectar Clústeres").click(mapa_cluster, outputs=htmlmap3)
|
| 167 |
-
|
| 168 |
-
with gr.Tab("Mapa Calidad Aire PM2.5"):
|
| 169 |
-
htmlmap4 = gr.HTML()
|
| 170 |
-
gr.Button("Visualizar PM2.5").click(mapa_pm, outputs=htmlmap4)
|
| 171 |
-
|
| 172 |
-
# Ejecutar app
|
| 173 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
+
import seaborn as sns
|
| 6 |
import plotly.express as px
|
| 7 |
import folium
|
| 8 |
from folium.plugins import HeatMap
|
| 9 |
from scipy.stats import chi2_contingency
|
| 10 |
+
|
| 11 |
+
# ===========================
|
| 12 |
+
# 1. CARGA Y PREPROCESAMIENTO
|
| 13 |
+
# ===========================
|
| 14 |
+
df = pd.read_csv("/mnt/data/VasculitisAsociadasA-BDD10jul24_DATA_2025-03-19_1033.csv")
|
| 15 |
+
|
| 16 |
+
# Derivar variables categóricas
|
| 17 |
+
categorias = {
|
| 18 |
+
'genero_cat': df['genero'].map({0: 'Masculino', 1: 'Femenino'}),
|
| 19 |
+
'regimen_cat': df['regimen'].map({1: 'Contributivo', 2: 'Subsidiado'}),
|
| 20 |
+
'estrato_cat': df['estrato'].map({0: 'Bajo', 1: 'Bajo', 2: 'Bajo', 3: 'Medio', 4: 'Medio', 5: 'Alto', 6: 'Alto'})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|
| 22 |
+
df = df.assign(**categorias)
|
| 23 |
+
|
| 24 |
+
# ANCA positivo
|
| 25 |
+
df['anca_cat'] = df['ancas'].map({0: 'Negativo', 1: 'Positivo'}).fillna('No definido')
|
| 26 |
+
df['mpo_cat'] = df['mpo'].map({0: 'Negativo', 1: 'Positivo'}).fillna('No definido')
|
| 27 |
+
df['pr3_cat'] = df['pr3'].map({0: 'Negativo', 1: 'Positivo'}).fillna('No definido')
|
| 28 |
+
|
| 29 |
+
# Compromiso renal por biopsia
|
| 30 |
+
biopsia_cols = [col for col in df.columns if col.startswith('biopsia___')]
|
| 31 |
+
df['biopsia_positiva'] = df[biopsia_cols].sum(axis=1).apply(lambda x: 'Sí' if x > 0 else 'No')
|
| 32 |
+
|
| 33 |
+
# Caso confirmado: ANCA+ y biopsia positiva
|
| 34 |
+
df['anca_y_renal'] = np.where((df['ancas'] == 1) & (df['biopsia_positiva'] == 'Sí'), 'Sí', 'No')
|
| 35 |
+
|
| 36 |
+
# Simulación de fechas para tendencia temporal
|
| 37 |
+
df['fecha_diag'] = pd.date_range(start='2012-01-01', periods=len(df), freq='30D')
|
| 38 |
+
|
| 39 |
+
# ===========================
|
| 40 |
+
# 2. FUNCIONES ANALÍTICAS
|
| 41 |
+
# ===========================
|
| 42 |
+
|
| 43 |
+
def plot_univariado(col):
|
| 44 |
+
plt.figure(figsize=(8, 5))
|
| 45 |
+
if df[col].dtype == 'object':
|
| 46 |
+
sns.countplot(data=df, x=col, order=df[col].value_counts().index)
|
| 47 |
+
plt.xticks(rotation=45)
|
| 48 |
+
else:
|
| 49 |
+
sns.histplot(df[col], kde=True)
|
| 50 |
+
plt.title(f"Distribución de {col}")
|
| 51 |
+
plt.tight_layout()
|
| 52 |
+
plt.savefig("univariado.png")
|
| 53 |
+
return "univariado.png"
|
| 54 |
+
|
| 55 |
+
def plot_bivariado(x, y):
|
| 56 |
+
plt.figure(figsize=(8, 5))
|
| 57 |
if df[x].dtype == 'object' and df[y].dtype == 'object':
|
| 58 |
+
sns.countplot(data=df, x=x, hue=y)
|
| 59 |
+
elif df[x].dtype == 'object':
|
| 60 |
+
sns.boxplot(data=df, x=x, y=y)
|
| 61 |
+
elif df[y].dtype == 'object':
|
| 62 |
+
sns.boxplot(data=df, x=y, y=x)
|
| 63 |
else:
|
| 64 |
+
sns.scatterplot(data=df, x=x, y=y)
|
| 65 |
+
plt.xticks(rotation=45)
|
| 66 |
+
plt.tight_layout()
|
| 67 |
+
plt.savefig("bivariado.png")
|
| 68 |
+
return "bivariado.png"
|
| 69 |
+
|
| 70 |
+
def mapa():
|
| 71 |
+
m = folium.Map(location=[4.65, -74.1], zoom_start=11)
|
| 72 |
+
casos = df.dropna(subset=['latitud', 'longitud'])
|
| 73 |
+
heat = casos[['latitud', 'longitud']].values.tolist()
|
| 74 |
+
HeatMap(heat).add_to(m)
|
| 75 |
+
for _, r in casos.iterrows():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
folium.Marker(
|
| 77 |
+
location=[r['latitud'], r['longitud']],
|
| 78 |
+
popup=f"ID: {r['id']}<br>ANCA: {r['anca_cat']}<br>Compromiso Renal: {r['biopsia_positiva']}"
|
| 79 |
+
).add_to(m)
|
| 80 |
+
m.save("mapa.html")
|
| 81 |
+
with open("mapa.html", "r", encoding="utf-8") as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return f.read()
|
| 83 |
|
| 84 |
+
def chi2(var1, var2):
|
| 85 |
+
tab = pd.crosstab(df[var1], df[var2])
|
| 86 |
+
chi, p, dof, _ = chi2_contingency(tab)
|
| 87 |
+
return f"<h4>Chi2: {chi:.2f}, p-valor: {p:.4f}, gl: {dof}</h4><br>{tab.to_html()}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
def tendencia():
|
| 90 |
+
conteo = df.groupby(df['fecha_diag'].dt.to_period('M')).size().reset_index(name='casos')
|
| 91 |
+
conteo['fecha'] = conteo['fecha_diag'].astype(str)
|
| 92 |
+
fig = px.line(conteo, x='fecha', y='casos', title="Tendencia mensual de diagnósticos")
|
| 93 |
+
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
# ===========================
|
| 96 |
+
# 3. INTERFAZ GRADIO AVANZADA
|
| 97 |
+
# ===========================
|
| 98 |
with gr.Blocks() as demo:
|
| 99 |
+
gr.Markdown("# Tablero Avanzado de Vasculitis ANCA con Compromiso Renal")
|
| 100 |
+
|
| 101 |
+
with gr.Tab("Distribuciones Univariadas"):
|
| 102 |
+
col_uni = gr.Dropdown(choices=df.columns.tolist(), label="Variable")
|
| 103 |
+
btn_uni = gr.Button("Graficar")
|
| 104 |
+
out_uni = gr.Image()
|
| 105 |
+
btn_uni.click(fn=plot_univariado, inputs=col_uni, outputs=out_uni)
|
| 106 |
+
|
| 107 |
+
with gr.Tab("Relaciones Bivariadas"):
|
| 108 |
+
col1 = gr.Dropdown(df.columns.tolist(), label="Variable 1")
|
| 109 |
+
col2 = gr.Dropdown(df.columns.tolist(), label="Variable 2")
|
| 110 |
+
btn_bi = gr.Button("Graficar")
|
| 111 |
+
out_bi = gr.Image()
|
| 112 |
+
btn_bi.click(fn=plot_bivariado, inputs=[col1, col2], outputs=out_bi)
|
| 113 |
+
|
| 114 |
+
with gr.Tab("Mapa Geoespacial"):
|
| 115 |
+
gr.Markdown("Mapa de calor y puntos individuales")
|
| 116 |
+
btn_map = gr.Button("Generar Mapa")
|
| 117 |
+
out_map = gr.HTML()
|
| 118 |
+
btn_map.click(fn=mapa, outputs=out_map)
|
| 119 |
|
| 120 |
with gr.Tab("Chi-cuadrado"):
|
| 121 |
+
cat1 = gr.Dropdown(df.select_dtypes('object').columns.tolist(), label="Categorica 1")
|
| 122 |
+
cat2 = gr.Dropdown(df.select_dtypes('object').columns.tolist(), label="Categorica 2")
|
| 123 |
+
btn_chi = gr.Button("Calcular")
|
| 124 |
+
out_chi = gr.HTML()
|
| 125 |
+
btn_chi.click(fn=chi2, inputs=[cat1, cat2], outputs=out_chi)
|
| 126 |
+
|
| 127 |
+
with gr.Tab("Tendencia Temporal"):
|
| 128 |
+
out_line = gr.Plot()
|
| 129 |
+
btn_line = gr.Button("Mostrar Gráfico")
|
| 130 |
+
btn_line.click(fn=tendencia, outputs=out_line)
|
| 131 |
+
|
| 132 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|