Man0707 commited on
Commit
4161ae5
·
verified ·
1 Parent(s): 9e2ad77

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +27 -22
src/streamlit_app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py
2
- # NYC StayWise - Airbnb Price Predictor (Fixed: Synthetic Data)
3
- # 100% Self-Contained • No External URLsDeploy-Ready
4
 
5
  import streamlit as st
6
  import pandas as pd
@@ -61,35 +61,40 @@ st.markdown("""
61
  </style>
62
  """, unsafe_allow_html=True)
63
 
64
- # ------------------ Generate Synthetic Airbnb Data (Self-Contained) ------------------
65
  @st.cache_data
66
  def generate_airbnb_data(n_samples=10000):
67
  np.random.seed(42) # Reproducible
68
 
69
- # Realistic NYC neighborhoods and properties
70
- neighborhoods = np.random.choice([
71
- 'Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island'
72
- ], n_samples, p=[0.4, 0.35, 0.15, 0.08, 0.02])
 
 
73
 
74
- room_types = np.random.choice([
75
- 'Entire home/apt', 'Private room', 'Shared room'
76
- ], n_samples, p=[0.6, 0.35, 0.05])
77
 
78
- property_types = np.random.choice([
79
- 'Apartment', 'House', 'Condominium', 'Loft', 'Townhouse', 'Other'
80
- ], n_samples, p=[0.7, 0.1, 0.1, 0.05, 0.03, 0.02])
81
 
82
- # Numeric features with realistic ranges
83
- accommodates = np.random.choice([1, 2, 3, 4, 5, 6, 8, 10, 16], n_samples, p=[0.1, 0.2, 0.2, 0.25, 0.15, 0.08, 0.02, 0.01, 0.01])
84
- bathrooms = np.random.uniform(0.5, 4.0, n_samples).round(1)
85
- bedrooms = np.random.choice([0, 1, 2, 3, 4, 5, 6], n_samples, p=[0.1, 0.4, 0.3, 0.15, 0.04, 0.008, 0.002])
86
- beds = np.random.choice([1, 2, 3, 4, 5, 6, 8, 10], n_samples, p=[0.3, 0.3, 0.2, 0.1, 0.05, 0.03, 0.01, 0.01])
87
 
88
- # Price generation: Base on features (Manhattan + entire home = premium)
 
 
 
 
 
 
 
 
89
  base_price = np.where(neighborhoods == 'Manhattan', 150, 80)
90
  base_price += np.where(room_types == 'Entire home/apt', 100, np.where(room_types == 'Private room', 50, 20))
91
  base_price += accommodates * 15 + bedrooms * 40 + beds * 20 + bathrooms * 30
92
- base_price += np.random.normal(0, 50, n_samples) # Noise for realism
93
  price = np.clip(base_price, 20, 1000).round(0).astype(int)
94
 
95
  df = pd.DataFrame({
@@ -100,8 +105,8 @@ def generate_airbnb_data(n_samples=10000):
100
  'beds': beds,
101
  'neighbourhood_group_cleansed': neighborhoods,
102
  'property_type': property_types,
103
- 'cleaning_fee': np.random.choice([True, False], n_samples, p=[0.7, 0.3]),
104
- 'instant_bookable': np.random.choice([True, False], n_samples, p=[0.6, 0.4]),
105
  'price': price
106
  })
107
 
 
1
  # app.py
2
+ # NYC StayWise - Airbnb Price Predictor (Fixed: Probabilities Sum to 1)
3
+ # 100% Self-Contained • Synthetic NYC DataZero Errors
4
 
5
  import streamlit as st
6
  import pandas as pd
 
61
  </style>
62
  """, unsafe_allow_html=True)
63
 
64
+ # ------------------ Generate Synthetic Airbnb Data (Fixed Probabilities) ------------------
65
  @st.cache_data
66
  def generate_airbnb_data(n_samples=10000):
67
  np.random.seed(42) # Reproducible
68
 
69
+ # Helper to normalize probs to sum exactly 1.0
70
+ def safe_choice(choices, probs):
71
+ if probs is not None:
72
+ probs = np.array(probs)
73
+ probs = probs / probs.sum() # Normalize to exactly 1.0
74
+ return np.random.choice(choices, n_samples, p=probs)
75
 
76
+ # Realistic NYC neighborhoods
77
+ neighborhoods = safe_choice(['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island'], [0.4, 0.35, 0.15, 0.08, 0.02])
 
78
 
79
+ # Room types
80
+ room_types = safe_choice(['Entire home/apt', 'Private room', 'Shared room'], [0.6, 0.35, 0.05])
 
81
 
82
+ # Property types
83
+ property_types = safe_choice(['Apartment', 'House', 'Condominium', 'Loft', 'Townhouse', 'Other'], [0.7, 0.1, 0.1, 0.05, 0.03, 0.02])
 
 
 
84
 
85
+ # Accommodates (FIXED: Normalized probs)
86
+ accommodates = safe_choice([1, 2, 3, 4, 5, 6, 8, 10, 16], [0.1, 0.2, 0.2, 0.25, 0.15, 0.08, 0.02, 0.01, 0.01])
87
+
88
+ # Other numerics
89
+ bathrooms = np.clip(np.random.uniform(0.5, 4.0, n_samples), 0.5, 4.0).round(1)
90
+ bedrooms = safe_choice([0, 1, 2, 3, 4, 5, 6], [0.1, 0.4, 0.3, 0.15, 0.04, 0.008, 0.002])
91
+ beds = safe_choice([1, 2, 3, 4, 5, 6, 8, 10], [0.3, 0.3, 0.2, 0.1, 0.05, 0.03, 0.01, 0.01])
92
+
93
+ # Price generation: Realistic logic
94
  base_price = np.where(neighborhoods == 'Manhattan', 150, 80)
95
  base_price += np.where(room_types == 'Entire home/apt', 100, np.where(room_types == 'Private room', 50, 20))
96
  base_price += accommodates * 15 + bedrooms * 40 + beds * 20 + bathrooms * 30
97
+ base_price += np.random.normal(0, 50, n_samples) # Noise
98
  price = np.clip(base_price, 20, 1000).round(0).astype(int)
99
 
100
  df = pd.DataFrame({
 
105
  'beds': beds,
106
  'neighbourhood_group_cleansed': neighborhoods,
107
  'property_type': property_types,
108
+ 'cleaning_fee': safe_choice([True, False], [0.7, 0.3]),
109
+ 'instant_bookable': safe_choice([True, False], [0.6, 0.4]),
110
  'price': price
111
  })
112