Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import plotly.express as px | |
| import json | |
| from collections import Counter | |
| from plotly.subplots import make_subplots | |
| import plotly.graph_objects as go | |
| import unicodedata | |
| PRIMARY_COLOR_1 = "#e4ab0d" | |
| PRIMARY_COLOR_2 = "#2A4A6B" | |
| def plot_properties_by_municipality(dataframe: pd.DataFrame, top_n: int = 10): | |
| counts = dataframe['Municipio'].value_counts().reset_index() | |
| counts.columns = ['Municipio', 'Cantidad'] | |
| top_counts = counts.head(top_n) | |
| fig = px.bar(top_counts, x='Cantidad', y='Municipio', orientation='h', | |
| title=f'Top {top_n} Municipios con más Propiedades', | |
| labels={'Cantidad': 'Número de Propiedades', 'Municipio': 'Municipio'}, | |
| color='Cantidad', | |
| color_continuous_scale=[PRIMARY_COLOR_2, PRIMARY_COLOR_1]) | |
| return fig | |
| def plot_category_distribution(dataframe: pd.DataFrame): | |
| counts = dataframe['Categoria'].value_counts().reset_index() | |
| counts.columns = ['Categoria', 'Cantidad'] | |
| color_map = { | |
| 'alquiler': PRIMARY_COLOR_1, | |
| 'venta': PRIMARY_COLOR_2 | |
| } | |
| fig = px.pie(counts, values='Cantidad', names='Categoria', | |
| title='Proporción Alquiler vs Venta', | |
| hole=0.3, | |
| color='Categoria', | |
| color_discrete_map=color_map) | |
| return fig | |
| def plot_property_type_distribution(dataframe: pd.DataFrame): | |
| filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])] | |
| counts = filtered_data['Tipo'].value_counts().reset_index() | |
| counts.columns = ['Tipo', 'Cantidad'] | |
| color_map = { | |
| 'casa': PRIMARY_COLOR_1, | |
| 'apartamento': PRIMARY_COLOR_2 | |
| } | |
| fig = px.pie(counts, values='Cantidad', names='Tipo', | |
| title='Proporción Casas vs Apartamentos', | |
| hole=0.3, | |
| color='Tipo', | |
| color_discrete_map=color_map) | |
| return fig | |
| def plot_price_by_municipality(dataframe: pd.DataFrame): | |
| """Precio promedio y mediana por municipio""" | |
| price_data = dataframe.groupby('Municipio')['Precio'].agg(['mean', 'median']).reset_index() | |
| price_data = price_data.sort_values('mean', ascending=False) | |
| fig = px.bar(price_data, | |
| x='Municipio', | |
| y=['mean', 'median'], | |
| barmode='group', | |
| title='Precio Promedio y Mediano por Municipio', | |
| labels={'value': 'Precio (USD)', 'variable': 'Métrica'}, | |
| color_discrete_sequence=[PRIMARY_COLOR_1, PRIMARY_COLOR_2]) | |
| return fig | |
| def plot_price_trend_by_property_type(dataframe: pd.DataFrame): | |
| """Línea de tiempo de precios medianos por tipo de propiedad""" | |
| filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])] | |
| filtered_data['Fecha'] = pd.to_datetime(filtered_data['Fecha'], errors='coerce') | |
| filtered_data['Mes'] = filtered_data['Fecha'].dt.to_period('M').dt.to_timestamp() | |
| price_data = filtered_data.groupby(['Mes', 'Tipo'])['Precio'].median().reset_index() | |
| fig = px.line(price_data, | |
| x='Mes', | |
| y='Precio', | |
| color='Tipo', | |
| title='Evolución del Precio Mediano por Tipo de Propiedad', | |
| labels={'Precio': 'Precio Mediano (USD)', 'Mes': 'Fecha'}, | |
| color_discrete_map={ | |
| 'casa': PRIMARY_COLOR_1, | |
| 'apartamento': PRIMARY_COLOR_2 | |
| }) | |
| return fig | |
| def plot_price_trend(dataframe: pd.DataFrame): | |
| """Línea de tiempo de precio mediano filtrado""" | |
| if not pd.api.types.is_datetime64_any_dtype(dataframe['Fecha']): | |
| dataframe['Fecha'] = pd.to_datetime(dataframe['Fecha'], errors='coerce') | |
| dataframe['Mes'] = dataframe['Fecha'].dt.to_period('M').dt.to_timestamp() | |
| price_data = dataframe.groupby('Mes')['Precio'].median().reset_index() | |
| if len(price_data) < 2: | |
| return None | |
| fig = px.line( | |
| price_data, | |
| x='Mes', | |
| y='Precio', | |
| markers=True, | |
| title='Evolución del Precio Mediano', | |
| labels={'Precio': 'Precio Mediano (USD)', 'Mes': 'Fecha'}, | |
| color_discrete_sequence=[PRIMARY_COLOR_1] | |
| ) | |
| fig.update_traces( | |
| text=price_data['Precio'].apply(lambda x: f"${x:,.0f}"), | |
| textposition="top center", | |
| hovertemplate="<b>%{x|%b %Y}</b><br>Precio: $%{y:,.0f} USD" | |
| ) | |
| fig.update_layout( | |
| hovermode="x unified", | |
| xaxis=dict( | |
| tickformat="%b %Y", | |
| tickmode='auto', | |
| nticks=min(12, len(price_data)) | |
| )) | |
| return fig | |
| def plot_amenities_by_property_type(dataframe: pd.DataFrame, top_n: int = 10): | |
| """Analiza y grafica las amenidades más comunes por tipo de propiedad""" | |
| filtered_data = dataframe[dataframe['Tipo'].isin(['casa', 'apartamento'])].copy() | |
| filtered_data = filtered_data[filtered_data['Amenidades'].apply(lambda x: isinstance(x, list) and len(x) > 0)] | |
| houses_data = filtered_data[filtered_data['Tipo'] == 'casa'] | |
| apartments_data = filtered_data[filtered_data['Tipo'] == 'apartamento'] | |
| def count_amenities(data_group): | |
| counter = Counter() | |
| for amenities in data_group['Amenidades']: | |
| counter.update(amenities) | |
| return counter | |
| houses_counter = count_amenities(houses_data) | |
| apartments_counter = count_amenities(apartments_data) | |
| houses_count = pd.DataFrame(houses_counter.most_common(top_n), | |
| columns=['Amenidad', 'Casas']) | |
| apartments_count = pd.DataFrame(apartments_counter.most_common(top_n), | |
| columns=['Amenidad', 'Apartamentos']) | |
| comparison_data = pd.merge(houses_count, apartments_count, | |
| on='Amenidad', how='outer').fillna(0) | |
| comparison_data['Total'] = comparison_data['Casas'] + comparison_data['Apartamentos'] | |
| comparison_data = comparison_data.sort_values('Total', ascending=False).head(top_n) | |
| fig = px.bar( | |
| comparison_data, | |
| x='Amenidad', | |
| y=['Casas', 'Apartamentos'], | |
| title=f'Top {top_n} Amenidades por Tipo de Propiedad', | |
| labels={'value': 'Número de Propiedades', 'Amenidad': 'Amenidad'}, | |
| barmode='group', | |
| color_discrete_sequence=[PRIMARY_COLOR_1, PRIMARY_COLOR_2] | |
| ) | |
| fig.update_layout( | |
| legend_title_text='Tipo de Propiedad', | |
| xaxis_tickangle=-45, | |
| height=500, | |
| margin=dict(l=50, r=50, t=80, b=150) | |
| ) | |
| fig.update_traces( | |
| hovertemplate='<b>%{x}</b><br>Tipo: %{meta[0]}<br>Propiedades: %{y}', | |
| marker_line_color='white', | |
| marker_line_width=1, | |
| meta=[['Casas']*len(comparison_data), ['Apartamentos']*len(comparison_data)] | |
| ) | |
| return fig | |
| def plot_top_amenities_by_filters(dataframe: pd.DataFrame, top_n: int = 10) -> go.Figure: | |
| filtered_data = dataframe[dataframe['Amenidades'].apply(lambda x: isinstance(x, list) and len(x) > 0)].copy() | |
| amenities_counter = Counter() | |
| for amenities in filtered_data['Amenidades']: | |
| amenities_counter.update(amenities) | |
| top_amenities = amenities_counter.most_common(top_n) | |
| amenities_data = pd.DataFrame(top_amenities, columns=['Amenidad', 'Cantidad']) | |
| amenities_data = amenities_data.sort_values('Cantidad', ascending=True) | |
| title = "Top Amenidades" | |
| if len(filtered_data) > 0: | |
| category = filtered_data['Categoria'].iloc[0] if 'Categoria' in filtered_data.columns and len(filtered_data['Categoria'].unique()) == 1 else None | |
| property_type = filtered_data['Tipo'].iloc[0] if 'Tipo' in filtered_data.columns and len(filtered_data['Tipo'].unique()) == 1 else None | |
| if category and property_type: | |
| title = f"Amenidades más Comunes en {property_type.capitalize()}s para {'Venta' if category == 'venta' else 'Alquiler'}" | |
| elif category: | |
| title = f"Amenidades más Comunes en Propiedades para {'Venta' if category == 'venta' else 'Alquiler'}" | |
| elif property_type: | |
| title = f"Amenidades más Comunes en {property_type.capitalize()}s" | |
| fig = px.bar( | |
| amenities_data, | |
| x='Cantidad', | |
| y='Amenidad', | |
| orientation='h', | |
| title=title, | |
| labels={'Cantidad': 'Número de Propiedades', 'Amenidad': ''}, | |
| color='Cantidad', | |
| color_continuous_scale=[PRIMARY_COLOR_2, PRIMARY_COLOR_1] | |
| ) | |
| fig.update_layout( | |
| showlegend=False, | |
| height=500, | |
| margin=dict(l=150, r=50, t=80, b=50), | |
| yaxis={'categoryorder': 'total ascending'} | |
| ) | |
| return fig | |
| def get_top_amenities_description(dataframe: pd.DataFrame) -> str: | |
| if dataframe.empty: | |
| return "No hay datos disponibles" | |
| amenities_counter = Counter() | |
| for amenities in dataframe['Amenidades']: | |
| if isinstance(amenities, list): | |
| amenities_counter.update(amenities) | |
| top_amenities = [amenity for amenity, _ in amenities_counter.most_common(3)] | |
| return ", ".join(top_amenities) | |
| def get_infrastructure_description(dataframe: pd.DataFrame) -> str: | |
| infrastructure_keywords = ['cisterna', 'tanque elevado', 'planta eléctrica', 'pozo'] | |
| return describe_keywords_presence(dataframe, infrastructure_keywords, "infraestructura") | |
| def get_spaces_description(dataframe: pd.DataFrame) -> str: | |
| space_keywords = ['jardín', 'patio', 'terraza', 'balcón'] | |
| return describe_keywords_presence(dataframe, space_keywords, "espacios") | |
| def describe_keywords_presence(dataframe: pd.DataFrame, keywords: list, category_name: str) -> str: | |
| if dataframe.empty: | |
| return "No hay datos disponibles" | |
| total_properties = len(dataframe) | |
| keyword_counts = {keyword: 0 for keyword in keywords} | |
| for amenities in dataframe['Amenidades']: | |
| if isinstance(amenities, list): | |
| for keyword in keywords: | |
| if keyword in amenities: | |
| keyword_counts[keyword] += 1 | |
| significant_keywords = { | |
| kw: count for kw, count in keyword_counts.items() | |
| if count / total_properties >= 0.1 | |
| } | |
| if not significant_keywords: | |
| return f"ninguna característica de {category_name} destacada" | |
| sorted_keywords = sorted(significant_keywords.items(), key=lambda x: x[1], reverse=True) | |
| return ", ".join([f"{kw} ({count/total_properties:.0%})" for kw, count in sorted_keywords]) | |
| def plot_amenities_distribution(dataframe: pd.DataFrame, top_n: int = 15) -> go.Figure: | |
| amenities_counter = Counter() | |
| for amenities in dataframe['Amenidades']: | |
| if isinstance(amenities, list): | |
| amenities_counter.update(amenities) | |
| top_amenities = amenities_counter.most_common(top_n) | |
| amenities_data = pd.DataFrame(top_amenities, columns=['Amenidad', 'Cantidad']) | |
| if amenities_data.empty: | |
| return None | |
| fig_bar = px.bar( | |
| amenities_data.sort_values('Cantidad', ascending=True), | |
| x='Cantidad', | |
| y='Amenidad', | |
| orientation='h', | |
| title='Amenidades más Comunes', | |
| labels={'Cantidad': 'Número de Propiedades', 'Amenidad': ''}, | |
| color_discrete_sequence=[PRIMARY_COLOR_1] | |
| ) | |
| fig_bar.update_layout( | |
| showlegend=False, | |
| height=500, | |
| margin=dict(l=100, r=50, t=80, b=50) | |
| ) | |
| fig_pie = px.pie( | |
| amenities_data, | |
| names='Amenidad', | |
| values='Cantidad', | |
| title='Distribución de Amenidades', | |
| hole=0.3 | |
| ) | |
| fig_pie.update_traces( | |
| textposition='inside', | |
| textinfo='percent+label', | |
| hovertemplate='<b>%{label}</b><br>%{value} propiedades (%{percent})', | |
| marker=dict(colors=[PRIMARY_COLOR_1, PRIMARY_COLOR_2] + px.colors.sequential.Blues[2:]) | |
| ) | |
| fig_final = make_subplots( | |
| rows=1, cols=2, | |
| specs=[[{"type": "bar"}, {"type": "pie"}]], | |
| subplot_titles=('Top Amenidades', 'Distribución Porcentual'), | |
| horizontal_spacing=0.1 | |
| ) | |
| for trace in fig_bar.data: | |
| fig_final.add_trace(trace, row=1, col=1) | |
| fig_final.add_trace(fig_pie.data[0], row=1, col=2) | |
| fig_final.update_layout( | |
| title_text='Análisis de Amenidades', | |
| height=400, | |
| showlegend=False, | |
| margin=dict(t=100) | |
| ) | |
| return fig_final | |
| def plot_housing_construction(onei_data: json) -> go.Figure: | |
| construction_data = pd.DataFrame(onei_data['viviendas_terminadas']) | |
| fig = px.bar( | |
| construction_data, | |
| x='año', | |
| y='cantidad', | |
| title='Viviendas Terminadas en La Habana (2020-2024)', | |
| labels={'cantidad': 'Viviendas Terminadas', 'año': 'Año'}, | |
| text='cantidad', | |
| color_discrete_sequence=[PRIMARY_COLOR_1] | |
| ) | |
| fig.update_traces( | |
| textposition='outside', | |
| marker_line_color='black', | |
| marker_line_width=1 | |
| ) | |
| fig.update_layout( | |
| yaxis_range=[0, construction_data['cantidad'].max() + 1000], | |
| xaxis=dict(tickmode='linear') | |
| ) | |
| return fig | |
| def remove_accents(input_str): | |
| nfkd_form = unicodedata.normalize('NFKD', input_str) | |
| return "".join([c for c in nfkd_form if not unicodedata.combining(c)]) | |
| def plot_habana_map(dataframe: pd.DataFrame, geojson_path: str, category: str) -> px.choropleth: | |
| PRIMARY_COLOR = "#1b4a92" | |
| SECONDARY_COLOR = "#e4ab0d" | |
| BACKGROUND_COLOR = "#091b3f" | |
| TEXT_COLOR = "#e4ab0d" | |
| cat_map = {"Alquileres": "alquiler", "Ventas": "venta"} | |
| filtered_data = dataframe[dataframe["Categoria"] == cat_map[category]].copy() | |
| if len(filtered_data) < 3: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="⚠️ No hay suficientes datos para mostrar este mapa", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=dict(size=20, color=TEXT_COLOR)) | |
| fig.update_layout( | |
| title=f"Precio Mediano de {category} por Municipio", | |
| paper_bgcolor=BACKGROUND_COLOR, | |
| plot_bgcolor=BACKGROUND_COLOR, | |
| font=dict(color=TEXT_COLOR) | |
| ) | |
| return fig | |
| filtered_data["Municipio"] = filtered_data["Municipio"].apply(lambda x: remove_accents(x).lower().strip()) | |
| median_price = filtered_data.groupby("Municipio", as_index=False)["Precio"].median() | |
| try: | |
| with open(geojson_path, encoding="utf-8") as f: | |
| geojson = json.load(f) | |
| except Exception as e: | |
| print(f"Error cargando GeoJSON: {e}") | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text=f"Error cargando GeoJSON: {str(e)}", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=dict(size=15, color=TEXT_COLOR)) | |
| fig.update_layout( | |
| title=f"Precio Mediano de {category} por Municipio", | |
| paper_bgcolor=BACKGROUND_COLOR, | |
| plot_bgcolor=BACKGROUND_COLOR, | |
| font=dict(color=TEXT_COLOR)) | |
| return fig | |
| for feature in geojson['features']: | |
| municipio_name = feature['properties']['municipality'] | |
| feature['properties']['municipality_clean'] = remove_accents(municipio_name).lower().strip() | |
| fig = px.choropleth( | |
| median_price, | |
| geojson=geojson, | |
| locations="Municipio", | |
| featureidkey="properties.municipality_clean", | |
| color="Precio", | |
| color_continuous_scale=[PRIMARY_COLOR, SECONDARY_COLOR], # Escala azul a dorado | |
| range_color=(median_price["Precio"].min(), median_price["Precio"].max()), | |
| labels={"Precio": "Precio Mediano (USD)"}, | |
| title=f"Precio Mediano de {category} por Municipio", | |
| hover_data={"Municipio": True, "Precio": ":.0f"} | |
| ) | |
| fig.update_traces( | |
| hovertemplate="<b>%{location}</b><br>Precio: $%{z:,.0f} USD<extra></extra>" | |
| ) | |
| fig.update_geos( | |
| visible=False, | |
| center={"lat": 23.1136, "lon": -82.3666}, | |
| projection_scale=9, | |
| fitbounds="locations", | |
| bgcolor=BACKGROUND_COLOR | |
| ) | |
| fig.update_layout( | |
| margin={"r": 0, "t": 60, "l": 0, "b": 0}, | |
| height=550, | |
| coloraxis_colorbar=dict( | |
| title="USD", | |
| thickness=15, | |
| len=0.75, | |
| tickformat=",", | |
| tickprefix="$", | |
| yanchor="middle", | |
| y=0.5 | |
| ), | |
| paper_bgcolor=BACKGROUND_COLOR, | |
| plot_bgcolor=BACKGROUND_COLOR, | |
| font=dict(color=TEXT_COLOR), | |
| title_font=dict(size=20, color=SECONDARY_COLOR), | |
| coloraxis_colorbar_title_side="right", | |
| annotations=[ | |
| dict( | |
| x=0.5, | |
| y=-0.1, | |
| showarrow=False, | |
| text="Fuente: Análisis GAO | Datos 2024-2025", | |
| xref="paper", | |
| yref="paper", | |
| font=dict(size=12, color=TEXT_COLOR)) | |
| ] | |
| ) | |
| return fig |