import pandas as pd import plotly.express as px import json from collections import Counter from plotly.subplots import make_subplots import plotly.graph_objects as go import unicodedata PRIMARY_COLOR_1 = "#e4ab0d" PRIMARY_COLOR_2 = "#2A4A6B" def plot_properties_by_municipality(dataframe: pd.DataFrame, top_n: int = 10): counts = dataframe['Municipio'].value_counts().reset_index() counts.columns = ['Municipio', 'Cantidad'] top_counts = counts.head(top_n) fig = px.bar(top_counts, x='Cantidad', y='Municipio', orientation='h', title=f'Top {top_n} Municipios con más Propiedades', labels={'Cantidad': 'Número de Propiedades', 'Municipio': 'Municipio'}, color='Cantidad', color_continuous_scale=[PRIMARY_COLOR_2, PRIMARY_COLOR_1]) return fig def plot_category_distribution(dataframe: pd.DataFrame): counts = dataframe['Categoria'].value_counts().reset_index() counts.columns = ['Categoria', 'Cantidad'] color_map = { 'alquiler': PRIMARY_COLOR_1, 'venta': PRIMARY_COLOR_2 } fig = px.pie(counts, values='Cantidad', names='Categoria', title='Proporción Alquiler vs Venta', hole=0.3, color='Categoria', color_discrete_map=color_map) return fig def plot_property_type_distribution(dataframe: pd.DataFrame): filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])] counts = filtered_data['Tipo'].value_counts().reset_index() counts.columns = ['Tipo', 'Cantidad'] color_map = { 'casa': PRIMARY_COLOR_1, 'apartamento': PRIMARY_COLOR_2 } fig = px.pie(counts, values='Cantidad', names='Tipo', title='Proporción Casas vs Apartamentos', hole=0.3, color='Tipo', color_discrete_map=color_map) return fig def plot_price_by_municipality(dataframe: pd.DataFrame): """Precio promedio y mediana por municipio""" price_data = dataframe.groupby('Municipio')['Precio'].agg(['mean', 'median']).reset_index() price_data = price_data.sort_values('mean', ascending=False) fig = px.bar(price_data, x='Municipio', y=['mean', 'median'], barmode='group', title='Precio Promedio y Mediano por Municipio', labels={'value': 'Precio (USD)', 'variable': 'Métrica'}, color_discrete_sequence=[PRIMARY_COLOR_1, PRIMARY_COLOR_2]) return fig def plot_price_trend_by_property_type(dataframe: pd.DataFrame): """Línea de tiempo de precios medianos por tipo de propiedad""" filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])] filtered_data['Fecha'] = pd.to_datetime(filtered_data['Fecha'], errors='coerce') filtered_data['Mes'] = filtered_data['Fecha'].dt.to_period('M').dt.to_timestamp() price_data = filtered_data.groupby(['Mes', 'Tipo'])['Precio'].median().reset_index() fig = px.line(price_data, x='Mes', y='Precio', color='Tipo', title='Evolución del Precio Mediano por Tipo de Propiedad', labels={'Precio': 'Precio Mediano (USD)', 'Mes': 'Fecha'}, color_discrete_map={ 'casa': PRIMARY_COLOR_1, 'apartamento': PRIMARY_COLOR_2 }) return fig def plot_price_trend(dataframe: pd.DataFrame): """Línea de tiempo de precio mediano filtrado""" if not pd.api.types.is_datetime64_any_dtype(dataframe['Fecha']): dataframe['Fecha'] = pd.to_datetime(dataframe['Fecha'], errors='coerce') dataframe['Mes'] = dataframe['Fecha'].dt.to_period('M').dt.to_timestamp() price_data = dataframe.groupby('Mes')['Precio'].median().reset_index() if len(price_data) < 2: return None fig = px.line( price_data, x='Mes', y='Precio', markers=True, title='Evolución del Precio Mediano', labels={'Precio': 'Precio Mediano (USD)', 'Mes': 'Fecha'}, color_discrete_sequence=[PRIMARY_COLOR_1] ) fig.update_traces( text=price_data['Precio'].apply(lambda x: f"${x:,.0f}"), textposition="top center", hovertemplate="%{x|%b %Y}
Precio: $%{y:,.0f} USD" ) fig.update_layout( hovermode="x unified", xaxis=dict( tickformat="%b %Y", tickmode='auto', nticks=min(12, len(price_data)) )) return fig def plot_amenities_by_property_type(dataframe: pd.DataFrame, top_n: int = 10): """Analiza y grafica las amenidades más comunes por tipo de propiedad""" filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])].copy() filtered_data = filtered_data[filtered_data['Amenidades'].apply(lambda x: isinstance(x, list) and len(x) > 0)] houses_data = filtered_data[filtered_data['Tipo'] == 'casa'] apartments_data = filtered_data[filtered_data['Tipo'] == 'apartamento'] def count_amenities(data_group): counter = Counter() for amenities in data_group['Amenidades']: counter.update(amenities) return counter houses_counter = count_amenities(houses_data) apartments_counter = count_amenities(apartments_data) houses_count = pd.DataFrame(houses_counter.most_common(top_n), columns=['Amenidad', 'Casas']) apartments_count = pd.DataFrame(apartments_counter.most_common(top_n), columns=['Amenidad', 'Apartamentos']) comparison_data = pd.merge(houses_count, apartments_count, on='Amenidad', how='outer').fillna(0) comparison_data['Total'] = comparison_data['Casas'] + comparison_data['Apartamentos'] comparison_data = comparison_data.sort_values('Total', ascending=False).head(top_n) fig = px.bar( comparison_data, x='Amenidad', y=['Casas', 'Apartamentos'], title=f'Top {top_n} Amenidades por Tipo de Propiedad', labels={'value': 'Número de Propiedades', 'Amenidad': 'Amenidad'}, barmode='group', color_discrete_sequence=[PRIMARY_COLOR_1, PRIMARY_COLOR_2] ) fig.update_layout( legend_title_text='Tipo de Propiedad', xaxis_tickangle=-45, height=500, margin=dict(l=50, r=50, t=80, b=150) ) fig.update_traces( hovertemplate='%{x}
Tipo: %{meta[0]}
Propiedades: %{y}', marker_line_color='white', marker_line_width=1, meta=[['Casas']*len(comparison_data), ['Apartamentos']*len(comparison_data)] ) return fig def plot_top_amenities_by_filters(dataframe: pd.DataFrame, top_n: int = 10) -> go.Figure: filtered_data = dataframe[dataframe['Amenidades'].apply(lambda x: isinstance(x, list) and len(x) > 0)].copy() amenities_counter = Counter() for amenities in filtered_data['Amenidades']: amenities_counter.update(amenities) top_amenities = amenities_counter.most_common(top_n) amenities_data = pd.DataFrame(top_amenities, columns=['Amenidad', 'Cantidad']) amenities_data = amenities_data.sort_values('Cantidad', ascending=True) title = "Top Amenidades" if len(filtered_data) > 0: category = filtered_data['Categoria'].iloc[0] if 'Categoria' in filtered_data.columns and len(filtered_data['Categoria'].unique()) == 1 else None property_type = filtered_data['Tipo'].iloc[0] if 'Tipo' in filtered_data.columns and len(filtered_data['Tipo'].unique()) == 1 else None if category and property_type: title = f"Amenidades más Comunes en {property_type.capitalize()}s para {'Venta' if category == 'venta' else 'Alquiler'}" elif category: title = f"Amenidades más Comunes en Propiedades para {'Venta' if category == 'venta' else 'Alquiler'}" elif property_type: title = f"Amenidades más Comunes en {property_type.capitalize()}s" fig = px.bar( amenities_data, x='Cantidad', y='Amenidad', orientation='h', title=title, labels={'Cantidad': 'Número de Propiedades', 'Amenidad': ''}, color='Cantidad', color_continuous_scale=[PRIMARY_COLOR_2, PRIMARY_COLOR_1] ) fig.update_layout( showlegend=False, height=500, margin=dict(l=150, r=50, t=80, b=50), yaxis={'categoryorder': 'total ascending'} ) return fig def get_top_amenities_description(dataframe: pd.DataFrame) -> str: if dataframe.empty: return "No hay datos disponibles" amenities_counter = Counter() for amenities in dataframe['Amenidades']: if isinstance(amenities, list): amenities_counter.update(amenities) top_amenities = [amenity for amenity, _ in amenities_counter.most_common(3)] return ", ".join(top_amenities) def get_infrastructure_description(dataframe: pd.DataFrame) -> str: infrastructure_keywords = ['cisterna', 'tanque elevado', 'planta eléctrica', 'pozo'] return describe_keywords_presence(dataframe, infrastructure_keywords, "infraestructura") def get_spaces_description(dataframe: pd.DataFrame) -> str: space_keywords = ['jardín', 'patio', 'terraza', 'balcón'] return describe_keywords_presence(dataframe, space_keywords, "espacios") def describe_keywords_presence(dataframe: pd.DataFrame, keywords: list, category_name: str) -> str: if dataframe.empty: return "No hay datos disponibles" total_properties = len(dataframe) keyword_counts = {keyword: 0 for keyword in keywords} for amenities in dataframe['Amenidades']: if isinstance(amenities, list): for keyword in keywords: if keyword in amenities: keyword_counts[keyword] += 1 significant_keywords = { kw: count for kw, count in keyword_counts.items() if count / total_properties >= 0.1 } if not significant_keywords: return f"ninguna característica de {category_name} destacada" sorted_keywords = sorted(significant_keywords.items(), key=lambda x: x[1], reverse=True) return ", ".join([f"{kw} ({count/total_properties:.0%})" for kw, count in sorted_keywords]) def plot_amenities_distribution(dataframe: pd.DataFrame, top_n: int = 15) -> go.Figure: amenities_counter = Counter() for amenities in dataframe['Amenidades']: if isinstance(amenities, list): amenities_counter.update(amenities) top_amenities = amenities_counter.most_common(top_n) amenities_data = pd.DataFrame(top_amenities, columns=['Amenidad', 'Cantidad']) if amenities_data.empty: return None fig_bar = px.bar( amenities_data.sort_values('Cantidad', ascending=True), x='Cantidad', y='Amenidad', orientation='h', title='Amenidades más Comunes', labels={'Cantidad': 'Número de Propiedades', 'Amenidad': ''}, color_discrete_sequence=[PRIMARY_COLOR_1] ) fig_bar.update_layout( showlegend=False, height=500, margin=dict(l=100, r=50, t=80, b=50) ) fig_pie = px.pie( amenities_data, names='Amenidad', values='Cantidad', title='Distribución de Amenidades', hole=0.3 ) fig_pie.update_traces( textposition='inside', textinfo='percent+label', hovertemplate='%{label}
%{value} propiedades (%{percent})', marker=dict(colors=[PRIMARY_COLOR_1, PRIMARY_COLOR_2] + px.colors.sequential.Blues[2:]) ) fig_final = make_subplots( rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "pie"}]], subplot_titles=('Top Amenidades', 'Distribución Porcentual'), horizontal_spacing=0.1 ) for trace in fig_bar.data: fig_final.add_trace(trace, row=1, col=1) fig_final.add_trace(fig_pie.data[0], row=1, col=2) fig_final.update_layout( title_text='Análisis de Amenidades', height=400, showlegend=False, margin=dict(t=100) ) return fig_final def plot_housing_construction(onei_data: json) -> go.Figure: construction_data = pd.DataFrame(onei_data['viviendas_terminadas']) fig = px.bar( construction_data, x='año', y='cantidad', title='Viviendas Terminadas en La Habana (2020-2024)', labels={'cantidad': 'Viviendas Terminadas', 'año': 'Año'}, text='cantidad', color_discrete_sequence=[PRIMARY_COLOR_1] ) fig.update_traces( textposition='outside', marker_line_color='black', marker_line_width=1 ) fig.update_layout( yaxis_range=[0, construction_data['cantidad'].max() + 1000], xaxis=dict(tickmode='linear') ) return fig def remove_accents(input_str): nfkd_form = unicodedata.normalize('NFKD', input_str) return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) def plot_habana_map(dataframe: pd.DataFrame, geojson_path: str, category: str) -> px.choropleth: PRIMARY_COLOR = "#1b4a92" SECONDARY_COLOR = "#e4ab0d" BACKGROUND_COLOR = "#091b3f" TEXT_COLOR = "#e4ab0d" cat_map = {"Alquileres": "alquiler", "Ventas": "venta"} filtered_data = dataframe[dataframe["Categoria"] == cat_map[category]].copy() if len(filtered_data) < 3: fig = go.Figure() fig.add_annotation( text="⚠️ No hay suficientes datos para mostrar este mapa", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=20, color=TEXT_COLOR)) fig.update_layout( title=f"Precio Mediano de {category} por Municipio", paper_bgcolor=BACKGROUND_COLOR, plot_bgcolor=BACKGROUND_COLOR, font=dict(color=TEXT_COLOR) ) return fig filtered_data["Municipio"] = filtered_data["Municipio"].apply(lambda x: remove_accents(x).lower().strip()) median_price = filtered_data.groupby("Municipio", as_index=False)["Precio"].median() try: with open(geojson_path, encoding="utf-8") as f: geojson = json.load(f) except Exception as e: print(f"Error cargando GeoJSON: {e}") fig = go.Figure() fig.add_annotation( text=f"Error cargando GeoJSON: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=15, color=TEXT_COLOR)) fig.update_layout( title=f"Precio Mediano de {category} por Municipio", paper_bgcolor=BACKGROUND_COLOR, plot_bgcolor=BACKGROUND_COLOR, font=dict(color=TEXT_COLOR)) return fig for feature in geojson['features']: municipio_name = feature['properties']['municipality'] feature['properties']['municipality_clean'] = remove_accents(municipio_name).lower().strip() fig = px.choropleth( median_price, geojson=geojson, locations="Municipio", featureidkey="properties.municipality_clean", color="Precio", color_continuous_scale=[PRIMARY_COLOR, SECONDARY_COLOR], # Escala azul a dorado range_color=(median_price["Precio"].min(), median_price["Precio"].max()), labels={"Precio": "Precio Mediano (USD)"}, title=f"Precio Mediano de {category} por Municipio", hover_data={"Municipio": True, "Precio": ":.0f"} ) fig.update_traces( hovertemplate="%{location}
Precio: $%{z:,.0f} USD" ) fig.update_geos( visible=False, center={"lat": 23.1136, "lon": -82.3666}, projection_scale=9, fitbounds="locations", bgcolor=BACKGROUND_COLOR ) fig.update_layout( margin={"r": 0, "t": 60, "l": 0, "b": 0}, height=550, coloraxis_colorbar=dict( title="USD", thickness=15, len=0.75, tickformat=",", tickprefix="$", yanchor="middle", y=0.5 ), paper_bgcolor=BACKGROUND_COLOR, plot_bgcolor=BACKGROUND_COLOR, font=dict(color=TEXT_COLOR), title_font=dict(size=20, color=SECONDARY_COLOR), coloraxis_colorbar_title_side="right", annotations=[ dict( x=0.5, y=-0.1, showarrow=False, text="Fuente: Análisis GAO | Datos 2024-2025", xref="paper", yref="paper", font=dict(size=12, color=TEXT_COLOR)) ] ) return fig