Nuhin23's picture
Update app.py
e678329 verified
# app.py
import pandas as pd
import numpy as np
from datasets import load_dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import gradio as gr
# ------------------------------
# 1️⃣ Load dataset (smaller subset)
# ------------------------------
dataset = load_dataset("divarofficial/real_estate_ads")
df = dataset["train"].to_pandas().sample(n=50000, random_state=42) # take only 50k rows
# ------------------------------
# 2️⃣ Keep only essential columns
# ------------------------------
columns_to_use = ['city_slug', 'rooms_count', 'building_size', 'property_type', 'price_value']
df = df[[col for col in columns_to_use if col in df.columns]]
# ------------------------------
# 3️⃣ Convert numeric columns safely
# ------------------------------
for col in ['rooms_count', 'building_size', 'price_value']:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna(subset=['rooms_count', 'building_size', 'price_value'])
# ------------------------------
# 4️⃣ Encode categorical features
# ------------------------------
le_city = LabelEncoder()
df['city_slug'] = le_city.fit_transform(df['city_slug'])
le_type = LabelEncoder()
df['property_type'] = le_type.fit_transform(df['property_type'])
# ------------------------------
# 5️⃣ Features and target
# ------------------------------
X = df[['city_slug', 'rooms_count', 'building_size', 'property_type']]
y = df['price_value']
# ------------------------------
# 6️⃣ Train model
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42) # fewer trees for speed
model.fit(X_train, y_train)
# ------------------------------
# 7️⃣ Prediction function
# ------------------------------
def predict_price(city, rooms, area, prop_type):
try:
city_enc = le_city.transform([city])[0]
type_enc = le_type.transform([prop_type])[0]
X_new = np.array([[city_enc, float(rooms), float(area), type_enc]])
price = model.predict(X_new)[0]
return f"Estimated Price: {price:,.0f} BDT"
except Exception as e:
return f"Error: {str(e)}"
# ------------------------------
# 8️⃣ Launch Gradio App
# ------------------------------
locations = list(le_city.classes_)
types = list(le_type.classes_)
gr.Interface(
fn=predict_price,
inputs=[
gr.Dropdown(locations, label="City"),
gr.Number(label="Rooms"),
gr.Number(label="Area (sqft)"),
gr.Dropdown(types, label="Property Type")
],
outputs="text",
title="🏠 Simple Bangladesh House Price Predictor",
description="Predict house prices based on city, rooms, area, and property type."
).launch()