Airnab / src /streamlit_app.py
Man0707's picture
Update src/streamlit_app.py
76e525c verified
# app.py
# NYC StayWise - Airbnb Price Predictor (100% Fixed & Final)
# Zero Errors • Self-Contained • Beautiful
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")
# ------------------ Page Config ------------------
st.set_page_config(page_title="NYC StayWise • Price Predictor", page_icon="City", layout="centered")
# ------------------ Design ------------------
st.markdown("""
<style>
.main {background:#0a0e17; color:#e0e0e0;}
.stApp {background:linear-gradient(135deg,#1a1a2e,#16213e);}
h1 {font-size:4.2rem; text-align:center;
background:linear-gradient(90deg,#00d4ff,#ff00c8,#ffd700);
-webkit-background-clip:text; -webkit-text-fill-color:transparent;}
.price {color:#00ff9d; font-size:4.5rem; text-align:center; font-weight:bold;}
.stButton>button {background:linear-gradient(45deg,#00d4ff,#ff00c8);
color:white; border:none; border-radius:50px; padding:1rem 3rem; font-size:1.4rem;}
</style>
""", unsafe_allow_html=True)
# ------------------ Generate Synthetic Data (FIXED: All floats) ------------------
@st.cache_data
def generate_airbnb_data(n_samples=10000):
np.random.seed(42)
# Helper: normalized probabilities
def choice(options, probs=None):
if probs is not None:
probs = np.array(probs) / np.sum(probs)
return np.random.choice(options, n_samples, p=probs)
# Categorical
neighborhood = choice(['Manhattan','Brooklyn','Queens','Bronx','Staten Island'], [0.4,0.35,0.15,0.08,0.02])
room_type = choice(['Entire home/apt','Private room','Shared room'], [0.6,0.35,0.05])
property_type = choice(['Apartment','House','Condominium','Loft','Townhouse','Other'], [0.7,0.1,0.1,0.05,0.03,0.02])
# Numeric — ALL AS FLOAT from the beginning!
accommodates = choice([1,2,3,4,5,6,8,10,16], [0.1,0.2,0.2,0.25,0.15,0.08,0.02,0.01,0.01]).astype(float)
bedrooms = choice([0,1,2,3,4,5,6], [0.1,0.4,0.3,0.15,0.04,0.008,0.002]).astype(float)
beds = choice([1,2,3,4,5,6,8,10], [0.3,0.3,0.2,0.1,0.05,0.03,0.01,0.01]).astype(float)
bathrooms = np.round(np.random.uniform(0.5, 4.0, n_samples), 1)
# Price calculation (now safe with floats)
price = np.where(neighborhood == 'Manhattan', 150.0, 80.0)
price += np.where(room_type == 'Entire home/apt', 100.0,
np.where(room_type == 'Private room', 50.0, 20.0))
price += accommodates * 15 + bedrooms * 40 + beds * 20 + bathrooms * 30
price += np.random.normal(0, 50, n_samples)
price = np.clip(price, 30, 1000).astype(int)
df = pd.DataFrame({
'room_type': room_type,
'accommodates': accommodates,
'bathrooms': bathrooms,
'bedrooms': bedrooms,
'beds': beds,
'neighbourhood_group_cleansed': neighborhood,
'property_type': property_type,
'cleaning_fee': choice([True, False], [0.7, 0.3]),
'instant_bookable': choice([True, False], [0.6, 0.4]),
'price': price
})
return df
df = generate_airbnb_data()
st.markdown("<h1>Codeluxe</h1>", unsafe_allow_html=True)
st.markdown("<h1>NYC StayWise</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align:center;font-size:1.8rem;color:#88ddff;'>Find the perfect price for your NYC Airbnb</p>", unsafe_allow_html=True)
# Stats
c1,c2,c3,c4 = st.columns(4)
c1.metric("Listings", f"{len(df):,}")
c2.metric("Avg Price", f"${df.price.mean():.0f}")
c3.metric("Cheapest", f"${df.price.min()}")
c4.metric("Luxury", f"${df.price.max():,}")
# ------------------ Model ------------------
X = df.drop('price', axis=1)
y = df['price']
X_encoded = pd.get_dummies(X, columns=['room_type','neighbourhood_group_cleansed','property_type'], drop_first=False)
TRAIN_COLUMNS = X_encoded.columns.tolist()
scaler = StandardScaler()
num_cols = ['accommodates','bathrooms','bedrooms','beds']
X_encoded[num_cols] = scaler.fit_transform(X_encoded[num_cols])
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)
@st.cache_resource
def get_model():
model = RandomForestRegressor(n_estimators=300, max_depth=20, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
return model
model = get_model()
pred = model.predict(X_test)
st.success(f"Model → MAE ${mean_absolute_error(y_test,pred):.0f} | R² {r2_score(y_test,pred):.3f}")
# ------------------ Prediction ------------------
st.markdown("<div class='card'>", unsafe_allow_html=True)
st.subheader("Your Listing Details")
col1, col2 = st.columns(2)
with col1:
room = st.selectbox("Room Type", ["Entire home/apt","Private room","Shared room"])
area = st.selectbox("Area", ["Manhattan","Brooklyn","Queens","Bronx","Staten Island"])
guests = st.slider("Guests",1,16,2)
beds = st.slider("Beds",1,10,1)
with col2:
bedrooms = st.slider("Bedrooms",0,6,1)
bathrooms = st.slider("Bathrooms",0.5,4.0,1.0,0.5)
cleaning = st.checkbox("Cleaning Fee")
instant = st.checkbox("Instant Book")
prop = st.selectbox("Property", ["Apartment","House","Condominium","Loft","Townhouse","Other"])
if st.button("Calculate Price", use_container_width=True):
inp = {
'accommodates': float(guests),
'bedrooms': float(bedrooms),
'beds': float(beds),
'bathrooms': float(bathrooms),
'cleaning_fee': int(cleaning),
'instant_bookable': int(instant),
'room_type': room,
'neighbourhood_group_cleansed': area,
'property_type': prop
}
sample = pd.get_dummies(pd.DataFrame([inp]), columns=['room_type','neighbourhood_group_cleansed','property_type'])
sample = sample.reindex(columns=TRAIN_COLUMNS, fill_value=0)
sample[num_cols] = scaler.transform(sample[num_cols])
price = model.predict(sample)[0]
st.markdown(f"<div class='price'>${price:.0f}</div>", unsafe_allow_html=True)
st.markdown("<h3 style='text-align:center;color:#88ffdd;'>Recommended Nightly Rate</h3>", unsafe_allow_html=True)
if price > 350: st.warning("Premium Luxury Tier")
elif price > 180: st.info("Great Value – High Demand")
else: st.success("Budget-Friendly")
st.markdown("</div>", unsafe_allow_html=True)
st.caption("NYC StayWise • 100% Original • Synthetic Data • December 2025")