# app.py # NYC StayWise - Airbnb Price Predictor (100% Fixed & Final) # Zero Errors • Self-Contained • Beautiful import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, r2_score import warnings warnings.filterwarnings("ignore") # ------------------ Page Config ------------------ st.set_page_config(page_title="NYC StayWise • Price Predictor", page_icon="City", layout="centered") # ------------------ Design ------------------ st.markdown(""" """, unsafe_allow_html=True) # ------------------ Generate Synthetic Data (FIXED: All floats) ------------------ @st.cache_data def generate_airbnb_data(n_samples=10000): np.random.seed(42) # Helper: normalized probabilities def choice(options, probs=None): if probs is not None: probs = np.array(probs) / np.sum(probs) return np.random.choice(options, n_samples, p=probs) # Categorical neighborhood = choice(['Manhattan','Brooklyn','Queens','Bronx','Staten Island'], [0.4,0.35,0.15,0.08,0.02]) room_type = choice(['Entire home/apt','Private room','Shared room'], [0.6,0.35,0.05]) property_type = choice(['Apartment','House','Condominium','Loft','Townhouse','Other'], [0.7,0.1,0.1,0.05,0.03,0.02]) # Numeric — ALL AS FLOAT from the beginning! accommodates = choice([1,2,3,4,5,6,8,10,16], [0.1,0.2,0.2,0.25,0.15,0.08,0.02,0.01,0.01]).astype(float) bedrooms = choice([0,1,2,3,4,5,6], [0.1,0.4,0.3,0.15,0.04,0.008,0.002]).astype(float) beds = choice([1,2,3,4,5,6,8,10], [0.3,0.3,0.2,0.1,0.05,0.03,0.01,0.01]).astype(float) bathrooms = np.round(np.random.uniform(0.5, 4.0, n_samples), 1) # Price calculation (now safe with floats) price = np.where(neighborhood == 'Manhattan', 150.0, 80.0) price += np.where(room_type == 'Entire home/apt', 100.0, np.where(room_type == 'Private room', 50.0, 20.0)) price += accommodates * 15 + bedrooms * 40 + beds * 20 + bathrooms * 30 price += np.random.normal(0, 50, n_samples) price = np.clip(price, 30, 1000).astype(int) df = pd.DataFrame({ 'room_type': room_type, 'accommodates': accommodates, 'bathrooms': bathrooms, 'bedrooms': bedrooms, 'beds': beds, 'neighbourhood_group_cleansed': neighborhood, 'property_type': property_type, 'cleaning_fee': choice([True, False], [0.7, 0.3]), 'instant_bookable': choice([True, False], [0.6, 0.4]), 'price': price }) return df df = generate_airbnb_data() st.markdown("
Find the perfect price for your NYC Airbnb
", unsafe_allow_html=True) # Stats c1,c2,c3,c4 = st.columns(4) c1.metric("Listings", f"{len(df):,}") c2.metric("Avg Price", f"${df.price.mean():.0f}") c3.metric("Cheapest", f"${df.price.min()}") c4.metric("Luxury", f"${df.price.max():,}") # ------------------ Model ------------------ X = df.drop('price', axis=1) y = df['price'] X_encoded = pd.get_dummies(X, columns=['room_type','neighbourhood_group_cleansed','property_type'], drop_first=False) TRAIN_COLUMNS = X_encoded.columns.tolist() scaler = StandardScaler() num_cols = ['accommodates','bathrooms','bedrooms','beds'] X_encoded[num_cols] = scaler.fit_transform(X_encoded[num_cols]) X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42) @st.cache_resource def get_model(): model = RandomForestRegressor(n_estimators=300, max_depth=20, random_state=42, n_jobs=-1) model.fit(X_train, y_train) return model model = get_model() pred = model.predict(X_test) st.success(f"Model → MAE ${mean_absolute_error(y_test,pred):.0f} | R² {r2_score(y_test,pred):.3f}") # ------------------ Prediction ------------------ st.markdown("