Spaces:
Sleeping
Sleeping
| # 🏠 MiamiHomeAI - Real Estate Price Predictor for Hugging Face Spaces | |
| # AI-powered Miami real estate price prediction | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.ensemble import RandomForestRegressor | |
| import joblib | |
| from pathlib import Path | |
| import json | |
| import re | |
| print("🏠 MiamiHomeAI iniciando...") | |
| # Configuración | |
| ZONES = [ | |
| 'Beachfront', 'Coastal', 'Downtown', 'Suburban', 'Waterfront', | |
| 'Premium', 'Luxury', 'Urban', 'Residential', 'Commercial', | |
| 'Urban-Coastal', 'Brickell', 'South Beach', 'Coral Gables', | |
| 'Hammock Lakes', 'Riviera', 'Rural' | |
| ] | |
| FLOOD_ZONES = ['X', 'AE', 'VE', 'AH', 'AO'] | |
| class FeatureEngineer: | |
| """Feature engineering COMPLETO para el modelo de precios - VERSIÓN EXACTA DEL ENTRENAMIENTO""" | |
| def _contains_any(series: pd.Series, keywords) -> pd.Series: | |
| if len(keywords) == 0: | |
| return pd.Series(False, index=series.index) | |
| pattern = '|'.join(re.escape(k.lower()) for k in keywords) | |
| return series.str.lower().str.contains(pattern, na=False) | |
| def create_features(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| # ===== Básicas ===== | |
| rooms_safe = (df['Rooms'].astype(float) + 1e-6) | |
| df['Size_per_Room'] = df['Size'].astype(float) / rooms_safe | |
| df['Bathroom_Ratio'] = df['Bathrooms'].astype(float) / rooms_safe | |
| df['Is_New'] = (df['Age'].astype(float) < 3).astype(np.uint8) | |
| df['Is_Renovated'] = ((df['Age'].astype(float) > 20) & (df['Age'].astype(float) < 25)).astype(np.uint8) | |
| df['Log_Size'] = np.log1p(df['Size'].astype(float)) | |
| df['Sqrt_Beach_Distance'] = np.sqrt(df['Beach_Distance'].astype(float) + 1.0) | |
| df['Inverse_Beach_Distance'] = 1.0 / (df['Beach_Distance'].astype(float) + 0.1) | |
| # ===== Zonas geográficas ===== | |
| z = df['Zone'].astype('string') | |
| coastal_premium_zones = ['Beachfront', 'Waterfront', 'Ocean View'] | |
| df['Coastal_Premium'] = FeatureEngineer._contains_any(z, coastal_premium_zones).astype(np.uint8) | |
| coastal_zones = ['Coastal', 'Beach', 'Bay'] | |
| df['Coastal_Zone'] = FeatureEngineer._contains_any(z, coastal_zones).astype(np.uint8) | |
| urban_premium_zones = ['Downtown', 'Brickell', 'Premium', 'Urban'] | |
| df['Urban_Premium'] = FeatureEngineer._contains_any(z, urban_premium_zones).astype(np.uint8) | |
| coral_gables_zones = [ | |
| 'Hammock Lakes', 'Cocoplum', 'Coral Gables', 'South Gables', | |
| 'Gables Estates', 'Miracle Mile', 'Venetian Pool', 'University', | |
| 'Old Cutler', 'Pinecrest' | |
| ] | |
| df['Coral_Gables_Zone'] = FeatureEngineer._contains_any(z, coral_gables_zones).astype(np.uint8) | |
| inland_luxury_zones = ['Luxury', 'Estates', 'Country Club', 'Golf'] | |
| df['Inland_Luxury'] = ( | |
| FeatureEngineer._contains_any(z, inland_luxury_zones) | |
| & ~FeatureEngineer._contains_any(z, coastal_premium_zones) | |
| ).astype(np.uint8) | |
| residential_zones = ['Residential', 'Suburban', 'Neighborhood'] | |
| df['Residential_Zone'] = FeatureEngineer._contains_any(z, residential_zones).astype(np.uint8) | |
| # ===== Distancias ===== | |
| bd = df['Beach_Distance'].astype(float) | |
| df['Beach_Proximity_Premium'] = np.select( | |
| [bd <= 1, bd <= 3, bd <= 6], | |
| [1.5, 1.2, 1.0], | |
| default=0.8 | |
| ) | |
| df['Distance_Adjusted_Premium'] = ( | |
| df['Urban_Premium'] * np.where(bd > 8, 0.7, 1.0) | |
| + df['Coral_Gables_Zone'] * np.where(bd > 12, 0.9, 1.0) | |
| ) | |
| # ===== Escuelas ===== | |
| sr = df['School_Rating'].astype(float) | |
| df['Excellent_Schools'] = (sr >= 8.5).astype(np.uint8) | |
| df['Good_Schools'] = ((sr >= 7.0) & (sr < 8.5)).astype(np.uint8) | |
| df['Average_Schools'] = (sr < 7.0).astype(np.uint8) | |
| df['School_Zone_Match'] = np.where( | |
| (df['Coral_Gables_Zone'] == 1) & (sr >= 8.0), 1.2, | |
| np.where((df['Coral_Gables_Zone'] == 1) & (sr < 6.5), 0.8, 1.0) | |
| ) | |
| # ===== Flood risk ===== | |
| flood_risk_mapping = { | |
| 'X': 0, # Bajo | |
| 'AE': 1, # Medio | |
| 'VE': 2, # Alto (costero) | |
| 'AH': 1, | |
| 'AO': 1 | |
| } | |
| df['Flood_Risk'] = df['Flood_Zone'].map(flood_risk_mapping).fillna(0).astype(int) | |
| # ===== Interacciones ===== | |
| df['Coastal_Beach_Interaction'] = df['Coastal_Zone'] * df['Inverse_Beach_Distance'] | |
| df['Premium_Beach_Interaction'] = df['Coastal_Premium'] * df['Beach_Proximity_Premium'] | |
| df['Size_Coastal_Premium'] = df['Size'].astype(float) * df['Coastal_Premium'] | |
| df['Size_Urban_Premium'] = df['Size'].astype(float) * df['Urban_Premium'] | |
| df['Size_Coral_Gables'] = df['Size'].astype(float) * df['Coral_Gables_Zone'] | |
| df['Coastal_Flood_Risk'] = df['Coastal_Zone'] * df['Flood_Risk'] | |
| df['Size_Flood_Risk'] = df['Size'].astype(float) * df['Flood_Risk'] | |
| df['New_Premium_Bonus'] = df['Is_New'] * (df['Coastal_Premium'] + df['Urban_Premium']) | |
| df['Old_Coral_Gables'] = ((df['Coral_Gables_Zone'] == 1) & (df['Age'].astype(float) > 30)).astype(np.uint8) | |
| return df | |
| def load_model(): | |
| """Cargar modelo entrenado o crear modelo demo""" | |
| model_files = [ | |
| "miami_premium_model_v3.joblib", | |
| "miami_premium_model.joblib", | |
| "miami_model.joblib", | |
| "real_estate_model.joblib" | |
| ] | |
| for file_name in model_files: | |
| if Path(file_name).exists(): | |
| try: | |
| model_data = joblib.load(file_name) | |
| if isinstance(model_data, dict) and 'model' in model_data: | |
| print(f"✅ Modelo cargado: {file_name}") | |
| return model_data['model'], True, model_data.get('metadata', {}) | |
| else: | |
| print(f"✅ Modelo simple cargado: {file_name}") | |
| return model_data, True, {} | |
| except Exception as e: | |
| print(f"❌ Error cargando {file_name}: {e}") | |
| continue | |
| print("⚠️ Modelo no encontrado, usando predicciones demo") | |
| return None, False, {} | |
| # Cargar modelo | |
| model, model_loaded, metadata = load_model() | |
| def predict_price(size, rooms, bathrooms, age, zone, school_rating, beach_distance, flood_zone): | |
| """Predecir precio de la propiedad""" | |
| try: | |
| # Crear DataFrame con los inputs (EXACTAMENTE como en el entrenamiento) | |
| input_data = pd.DataFrame({ | |
| 'Size': [size], | |
| 'Rooms': [rooms], | |
| 'Bathrooms': [bathrooms], | |
| 'Age': [age], | |
| 'Zone': [zone], | |
| 'School_Rating': [school_rating], | |
| 'Beach_Distance': [beach_distance], | |
| 'Flood_Zone': [flood_zone] | |
| }) | |
| if model_loaded and model is not None: | |
| # Aplicar feature engineering COMPLETO | |
| features_df = FeatureEngineer.create_features(input_data) | |
| print(f"🔍 Debug - Features creadas: {features_df.shape}") | |
| print(f"🔍 Debug - Columnas: {list(features_df.columns)}") | |
| # Realizar predicción | |
| try: | |
| predicted_price = model.predict(features_df)[0] | |
| confidence = "Alta" | |
| # Formatear precio | |
| price_formatted = f"${predicted_price:,.0f}" | |
| # Análisis de factores | |
| analysis = analyze_property_factors( | |
| size, rooms, bathrooms, age, zone, | |
| school_rating, beach_distance, flood_zone, predicted_price | |
| ) | |
| result = f""" | |
| 🏠 **PREDICCIÓN DE PRECIO - MIAMIHOMEAI** | |
| 💰 **Precio Estimado: {price_formatted}** | |
| 🎯 **Confianza: {confidence}** | |
| 📊 **ANÁLISIS DE FACTORES:** | |
| {analysis} | |
| 📋 **DETALLES DE LA PROPIEDAD:** | |
| • 📐 Tamaño: {size:,} ft² | |
| • 🏠 Habitaciones: {rooms} | |
| • 🚿 Baños: {bathrooms} | |
| • 📅 Edad: {age} años | |
| • 🌍 Zona: {zone} | |
| • 🎓 Rating Escolar: {school_rating}/10 | |
| • 🏖️ Distancia a Playa: {beach_distance} millas | |
| • 🌊 Zona de Inundación: {flood_zone} | |
| 🤖 **Modelo:** {metadata.get('model_name', 'Miami Premium Model v3')} | |
| 📈 **R²:** {metadata.get('r2_score', 'N/A') if isinstance(metadata.get('r2_score'), str) else f"{metadata.get('r2_score', 0):.3f}"} | |
| 🎯 **RMSE:** {metadata.get('rmse', 'N/A') if isinstance(metadata.get('rmse'), str) else f"${metadata.get('rmse', 0):,.0f}"} | |
| """ | |
| except Exception as e: | |
| result = f"❌ Error en predicción: {str(e)}\n🔍 Debug info: {features_df.shape if 'features_df' in locals() else 'No features_df'}" | |
| else: | |
| # Predicción demo mejorada | |
| base_price = 400000 | |
| # Factor por tamaño | |
| size_factor = size * 250 | |
| # Factor por zona | |
| zone_multipliers = { | |
| 'Beachfront': 2.5, 'Waterfront': 2.2, 'Luxury': 2.0, | |
| 'Premium': 1.8, 'Urban-Coastal': 1.6, 'Coastal': 1.5, | |
| 'Downtown': 1.4, 'Brickell': 1.7, 'South Beach': 2.3, | |
| 'Coral Gables': 1.6, 'Urban': 1.2, 'Suburban': 1.0, | |
| 'Residential': 0.9, 'Commercial': 0.8, 'Rural': 0.7 | |
| } | |
| zone_multiplier = zone_multipliers.get(zone, 1.0) | |
| # Factor por proximidad a playa | |
| beach_bonus = max(0, (5 - beach_distance) * 75000) | |
| # Factor por edad | |
| age_penalty = age * 3000 | |
| # Factor por escuelas | |
| school_bonus = (school_rating - 5) * 25000 | |
| # Factor por flood zone | |
| flood_penalties = {'X': 0, 'AE': -30000, 'VE': -60000, 'AH': -25000, 'AO': -20000} | |
| flood_penalty = flood_penalties.get(flood_zone, 0) | |
| demo_price = (base_price + size_factor * zone_multiplier + | |
| beach_bonus + school_bonus + flood_penalty - age_penalty) | |
| demo_price = max(200000, demo_price) # Precio mínimo | |
| price_formatted = f"${demo_price:,.0f}" | |
| result = f""" | |
| 🏠 **PREDICCIÓN DEMO - MIAMIHOMEAI** | |
| 💰 **Precio Estimado: {price_formatted}** | |
| 🎯 **Confianza: DEMO** | |
| ⚠️ **MODO DEMOSTRACIÓN** | |
| Esta es una predicción simulada para mostrar la funcionalidad. | |
| Para predicciones reales, sube el archivo: `miami_premium_model_v3.joblib` | |
| 📊 **Factores considerados en demo:** | |
| • Tamaño: ${size_factor * zone_multiplier:,.0f} | |
| • Zona ({zone}): x{zone_multiplier} multiplicador | |
| • Proximidad playa: ${beach_bonus:,.0f} | |
| • Calidad escolar: ${school_bonus:,.0f} | |
| • Flood risk: ${flood_penalty:,.0f} | |
| • Depreciación edad: ${-age_penalty:,.0f} | |
| 🤖 **Sube tu modelo entrenado para predicciones reales con 95.8% precisión** | |
| """ | |
| return result | |
| except Exception as e: | |
| return f"❌ Error procesando datos: {str(e)}" | |
| def analyze_property_factors(size, rooms, bathrooms, age, zone, school_rating, beach_distance, flood_zone, price): | |
| """Analizar factores que influyen en el precio""" | |
| factors = [] | |
| # Análisis de precio por pie cuadrado | |
| price_per_sqft = price / size | |
| if price_per_sqft > 500: | |
| factors.append("💎 **Precio premium** - $500+ por pie cuadrado") | |
| elif price_per_sqft > 300: | |
| factors.append("💰 **Precio alto** - $300-500 por pie cuadrado") | |
| elif price_per_sqft < 200: | |
| factors.append("💵 **Precio accesible** - Menos de $200 por pie cuadrado") | |
| # Análisis de tamaño | |
| if size > 3500: | |
| factors.append("🏰 **Mansión** - Propiedad de gran tamaño (+3,500 ft²)") | |
| elif size > 2500: | |
| factors.append("🏘️ **Casa grande** - Amplio espacio familiar (2,500+ ft²)") | |
| elif size < 1000: | |
| factors.append("🏠 **Propiedad compacta** - Ideal para solteros/parejas") | |
| # Análisis de ubicación específica | |
| if 'Beachfront' in zone or 'Waterfront' in zone: | |
| factors.append("🏖️ **Frente al agua** - Ubicación premium con vistas directas") | |
| elif 'Urban-Coastal' in zone: | |
| factors.append("🌊 **Urban-Coastal** - Perfecto balance urbano-costero") | |
| elif 'Brickell' in zone: | |
| factors.append("🏙️ **Brickell** - Centro financiero de Miami") | |
| elif 'South Beach' in zone: | |
| factors.append("🎉 **South Beach** - Zona icónica de entretenimiento") | |
| elif 'Coral Gables' in zone or 'Hammock Lakes' in zone: | |
| factors.append("🌺 **Coral Gables** - Elegante vecindario residencial") | |
| elif 'Luxury' in zone or 'Premium' in zone: | |
| factors.append("💎 **Zona de lujo** - Área exclusiva de alta gama") | |
| # Análisis de proximidad a playa | |
| if beach_distance < 0.5: | |
| factors.append("🏄 **A pasos de la playa** - Acceso inmediato al océano") | |
| elif beach_distance < 1.5: | |
| factors.append("🚶 **Muy cerca de la playa** - Caminata corta al océano") | |
| elif beach_distance < 3: | |
| factors.append("🚗 **Cerca de la playa** - 5-10 min en auto") | |
| elif beach_distance > 6: | |
| factors.append("🏡 **Zona interior** - Precio más moderado, lejos de la costa") | |
| # Análisis de edad y condición | |
| if age < 3: | |
| factors.append("✨ **Construcción nueva** - Sin depreciación, tecnología moderna") | |
| elif age < 10: | |
| factors.append("🏗️ **Propiedad reciente** - Excelente condición") | |
| elif 20 <= age <= 25: | |
| factors.append("🔧 **Era de renovación** - Posible actualización reciente") | |
| elif age > 40: | |
| factors.append("🏛️ **Propiedad histórica** - Carácter único, posible renovación") | |
| # Análisis escolar detallado | |
| if school_rating >= 9: | |
| factors.append("🎓 **Escuelas excepcionales** - Top 10% del distrito") | |
| elif school_rating >= 8: | |
| factors.append("📚 **Excelentes escuelas** - Muy atractivo para familias") | |
| elif school_rating >= 7: | |
| factors.append("🎒 **Buenas escuelas** - Sólida educación local") | |
| elif school_rating < 5: | |
| factors.append("📖 **Escuelas básicas** - Consideración importante para familias") | |
| # Análisis de riesgo de inundación | |
| if flood_zone == 'X': | |
| factors.append("☀️ **Bajo riesgo de inundación** - Sin seguro obligatorio") | |
| elif flood_zone == 'AE': | |
| factors.append("🌧️ **Riesgo moderado** - Seguro de inundación requerido") | |
| elif flood_zone == 'VE': | |
| factors.append("🌊 **Alto riesgo costero** - Zona de olas, seguro caro") | |
| # Análisis de distribución de espacios | |
| rooms_per_sqft = size / rooms if rooms > 0 else 0 | |
| if rooms_per_sqft > 800: | |
| factors.append("🛏️ **Habitaciones amplias** - Espacios generosos") | |
| elif rooms_per_sqft < 400: | |
| factors.append("🏠 **Distribución compacta** - Uso eficiente del espacio") | |
| if not factors: | |
| factors.append("📊 **Propiedad estándar** - Características promedio del mercado") | |
| return "\n".join([f"• {factor}" for factor in factors]) | |
| print("✅ MiamiHomeAI listo!") | |
| # Crear interfaz Gradio | |
| interface = gr.Interface( | |
| fn=predict_price, | |
| inputs=[ | |
| gr.Slider( | |
| minimum=500, maximum=10000, value=2000, step=50, | |
| label="🏠 Tamaño (ft²)", | |
| info="Superficie total de la propiedad" | |
| ), | |
| gr.Slider( | |
| minimum=1, maximum=10, value=3, step=1, | |
| label="🛏️ Habitaciones", | |
| info="Número total de habitaciones" | |
| ), | |
| gr.Slider( | |
| minimum=1, maximum=8, value=2, step=0.5, | |
| label="🚿 Baños", | |
| info="Número de baños completos y medios" | |
| ), | |
| gr.Slider( | |
| minimum=0, maximum=50, value=10, step=1, | |
| label="📅 Edad (años)", | |
| info="Años desde la construcción" | |
| ), | |
| gr.Dropdown( | |
| choices=ZONES, value="Urban-Coastal", | |
| label="🌍 Zona", | |
| info="Ubicación geográfica de la propiedad" | |
| ), | |
| gr.Slider( | |
| minimum=1, maximum=10, value=7, step=0.1, | |
| label="🎓 Rating Escolar", | |
| info="Calificación promedio de escuelas cercanas (1-10)" | |
| ), | |
| gr.Slider( | |
| minimum=0, maximum=20, value=2.5, step=0.1, | |
| label="🏖️ Distancia a Playa (millas)", | |
| info="Distancia a la playa más cercana" | |
| ), | |
| gr.Dropdown( | |
| choices=FLOOD_ZONES, value="AE", | |
| label="🌊 Zona de Inundación", | |
| info="Clasificación FEMA de riesgo de inundación" | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="💰 Predicción de Precio", | |
| lines=20, | |
| show_copy_button=True | |
| ), | |
| title="🏠 MiamiHomeAI - Predictor de Precios Inmobiliarios", | |
| description=""" | |
| **🎯 Inteligencia Artificial para predicción de precios inmobiliarios en Miami** | |
| Obtén estimaciones precisas del valor de propiedades basadas en: | |
| 📐 Características físicas • 🌍 Ubicación geográfica • 🎓 Calidad educativa • 🏖️ Proximidad costera | |
| **🚀 Modelo entrenado con R² = 95.8% usando XGBoost avanzado** | |
| """, | |
| article=""" | |
| ### 🏖️ Sobre el Mercado Inmobiliario de Miami | |
| **🌊 Factores Clave del Precio:** | |
| - **Proximidad a la playa**: Las propiedades costeras pueden valer 2-3x más | |
| - **Zona de ubicación**: Áreas como South Beach, Brickell, y Coral Gables son premium | |
| - **Riesgo de inundación**: Factor crítico en una ciudad costera | |
| - **Calidad escolar**: Influye significativamente en el valor residencial | |
| **📊 Características del Modelo:** | |
| - **Algoritmos**: XGBoost (ganador), LightGBM, Random Forest, Stacking | |
| - **Feature Engineering**: 38 características derivadas automáticamente | |
| - **Precisión**: R² = 95.8%, RMSE optimizado | |
| - **Datos**: 3000+ propiedades de Miami con validación estratificada | |
| **🎯 Casos de Uso:** | |
| - **🏡 Compradores**: Evaluar si un precio es justo | |
| - **🏢 Agentes**: Pricing automático de propiedades | |
| - **💼 Inversores**: Análisis de oportunidades de inversión | |
| - **🏦 Bancos**: Evaluación para préstamos hipotecarios | |
| ### 🔧 Tecnología | |
| **ML:** XGBoost, Scikit-learn • **UI:** Gradio • **Deploy:** Hugging Face Spaces | |
| --- | |
| **🏠 Desarrollado con ❤️ para el mercado inmobiliario de Miami** | |
| """, | |
| examples=[ | |
| [2500, 3, 2, 5, "Beachfront", 8.5, 0.5, "X"], # Lujo costero | |
| [1800, 2, 2, 15, "Urban-Coastal", 7.0, 2.5, "AE"], # Edgewater típico | |
| [4000, 4, 3, 2, "Luxury", 9.0, 0.2, "X"], # Mansión nueva | |
| [1200, 1, 1, 25, "Suburban", 6.0, 8.0, "VE"], # Económico interior | |
| [3200, 3, 2.5, 8, "Brickell", 8.0, 3.0, "AE"], # Brickell premium | |
| [2800, 4, 3, 12, "Coral Gables", 9.5, 4.0, "X"] # Coral Gables familiar | |
| ], | |
| cache_examples=False, | |
| theme="default" | |
| ) | |
| # Lanzar aplicación | |
| if __name__ == "__main__": | |
| print("🌐 Lanzando MiamiHomeAI...") | |
| interface.launch() | |
| print("🎉 ¡MiamiHomeAI lanzado exitosamente!") |