# app.py # NYC StayWise - Airbnb Price Predictor (100% Fixed & Final) # Zero Errors • Self-Contained • Beautiful import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, r2_score import warnings warnings.filterwarnings("ignore") # ------------------ Page Config ------------------ st.set_page_config(page_title="NYC StayWise • Price Predictor", page_icon="City", layout="centered") # ------------------ Design ------------------ st.markdown(""" """, unsafe_allow_html=True) # ------------------ Generate Synthetic Data (FIXED: All floats) ------------------ @st.cache_data def generate_airbnb_data(n_samples=10000): np.random.seed(42) # Helper: normalized probabilities def choice(options, probs=None): if probs is not None: probs = np.array(probs) / np.sum(probs) return np.random.choice(options, n_samples, p=probs) # Categorical neighborhood = choice(['Manhattan','Brooklyn','Queens','Bronx','Staten Island'], [0.4,0.35,0.15,0.08,0.02]) room_type = choice(['Entire home/apt','Private room','Shared room'], [0.6,0.35,0.05]) property_type = choice(['Apartment','House','Condominium','Loft','Townhouse','Other'], [0.7,0.1,0.1,0.05,0.03,0.02]) # Numeric — ALL AS FLOAT from the beginning! accommodates = choice([1,2,3,4,5,6,8,10,16], [0.1,0.2,0.2,0.25,0.15,0.08,0.02,0.01,0.01]).astype(float) bedrooms = choice([0,1,2,3,4,5,6], [0.1,0.4,0.3,0.15,0.04,0.008,0.002]).astype(float) beds = choice([1,2,3,4,5,6,8,10], [0.3,0.3,0.2,0.1,0.05,0.03,0.01,0.01]).astype(float) bathrooms = np.round(np.random.uniform(0.5, 4.0, n_samples), 1) # Price calculation (now safe with floats) price = np.where(neighborhood == 'Manhattan', 150.0, 80.0) price += np.where(room_type == 'Entire home/apt', 100.0, np.where(room_type == 'Private room', 50.0, 20.0)) price += accommodates * 15 + bedrooms * 40 + beds * 20 + bathrooms * 30 price += np.random.normal(0, 50, n_samples) price = np.clip(price, 30, 1000).astype(int) df = pd.DataFrame({ 'room_type': room_type, 'accommodates': accommodates, 'bathrooms': bathrooms, 'bedrooms': bedrooms, 'beds': beds, 'neighbourhood_group_cleansed': neighborhood, 'property_type': property_type, 'cleaning_fee': choice([True, False], [0.7, 0.3]), 'instant_bookable': choice([True, False], [0.6, 0.4]), 'price': price }) return df df = generate_airbnb_data() st.markdown("

Codeluxe

", unsafe_allow_html=True) st.markdown("

NYC StayWise

", unsafe_allow_html=True) st.markdown("

Find the perfect price for your NYC Airbnb

", unsafe_allow_html=True) # Stats c1,c2,c3,c4 = st.columns(4) c1.metric("Listings", f"{len(df):,}") c2.metric("Avg Price", f"${df.price.mean():.0f}") c3.metric("Cheapest", f"${df.price.min()}") c4.metric("Luxury", f"${df.price.max():,}") # ------------------ Model ------------------ X = df.drop('price', axis=1) y = df['price'] X_encoded = pd.get_dummies(X, columns=['room_type','neighbourhood_group_cleansed','property_type'], drop_first=False) TRAIN_COLUMNS = X_encoded.columns.tolist() scaler = StandardScaler() num_cols = ['accommodates','bathrooms','bedrooms','beds'] X_encoded[num_cols] = scaler.fit_transform(X_encoded[num_cols]) X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42) @st.cache_resource def get_model(): model = RandomForestRegressor(n_estimators=300, max_depth=20, random_state=42, n_jobs=-1) model.fit(X_train, y_train) return model model = get_model() pred = model.predict(X_test) st.success(f"Model → MAE ${mean_absolute_error(y_test,pred):.0f} | R² {r2_score(y_test,pred):.3f}") # ------------------ Prediction ------------------ st.markdown("
", unsafe_allow_html=True) st.subheader("Your Listing Details") col1, col2 = st.columns(2) with col1: room = st.selectbox("Room Type", ["Entire home/apt","Private room","Shared room"]) area = st.selectbox("Area", ["Manhattan","Brooklyn","Queens","Bronx","Staten Island"]) guests = st.slider("Guests",1,16,2) beds = st.slider("Beds",1,10,1) with col2: bedrooms = st.slider("Bedrooms",0,6,1) bathrooms = st.slider("Bathrooms",0.5,4.0,1.0,0.5) cleaning = st.checkbox("Cleaning Fee") instant = st.checkbox("Instant Book") prop = st.selectbox("Property", ["Apartment","House","Condominium","Loft","Townhouse","Other"]) if st.button("Calculate Price", use_container_width=True): inp = { 'accommodates': float(guests), 'bedrooms': float(bedrooms), 'beds': float(beds), 'bathrooms': float(bathrooms), 'cleaning_fee': int(cleaning), 'instant_bookable': int(instant), 'room_type': room, 'neighbourhood_group_cleansed': area, 'property_type': prop } sample = pd.get_dummies(pd.DataFrame([inp]), columns=['room_type','neighbourhood_group_cleansed','property_type']) sample = sample.reindex(columns=TRAIN_COLUMNS, fill_value=0) sample[num_cols] = scaler.transform(sample[num_cols]) price = model.predict(sample)[0] st.markdown(f"
${price:.0f}
", unsafe_allow_html=True) st.markdown("

Recommended Nightly Rate

", unsafe_allow_html=True) if price > 350: st.warning("Premium Luxury Tier") elif price > 180: st.info("Great Value – High Demand") else: st.success("Budget-Friendly") st.markdown("
", unsafe_allow_html=True) st.caption("NYC StayWise • 100% Original • Synthetic Data • December 2025")