Spaces:
Runtime error
Runtime error
| # app.py | |
| import pandas as pd | |
| import numpy as np | |
| from datasets import load_dataset | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.ensemble import RandomForestRegressor | |
| import gradio as gr | |
| # ------------------------------ | |
| # 1οΈβ£ Load dataset (smaller subset) | |
| # ------------------------------ | |
| dataset = load_dataset("divarofficial/real_estate_ads") | |
| df = dataset["train"].to_pandas().sample(n=50000, random_state=42) # take only 50k rows | |
| # ------------------------------ | |
| # 2οΈβ£ Keep only essential columns | |
| # ------------------------------ | |
| columns_to_use = ['city_slug', 'rooms_count', 'building_size', 'property_type', 'price_value'] | |
| df = df[[col for col in columns_to_use if col in df.columns]] | |
| # ------------------------------ | |
| # 3οΈβ£ Convert numeric columns safely | |
| # ------------------------------ | |
| for col in ['rooms_count', 'building_size', 'price_value']: | |
| if col in df.columns: | |
| df[col] = pd.to_numeric(df[col], errors='coerce') | |
| df = df.dropna(subset=['rooms_count', 'building_size', 'price_value']) | |
| # ------------------------------ | |
| # 4οΈβ£ Encode categorical features | |
| # ------------------------------ | |
| le_city = LabelEncoder() | |
| df['city_slug'] = le_city.fit_transform(df['city_slug']) | |
| le_type = LabelEncoder() | |
| df['property_type'] = le_type.fit_transform(df['property_type']) | |
| # ------------------------------ | |
| # 5οΈβ£ Features and target | |
| # ------------------------------ | |
| X = df[['city_slug', 'rooms_count', 'building_size', 'property_type']] | |
| y = df['price_value'] | |
| # ------------------------------ | |
| # 6οΈβ£ Train model | |
| # ------------------------------ | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| model = RandomForestRegressor(n_estimators=100, random_state=42) # fewer trees for speed | |
| model.fit(X_train, y_train) | |
| # ------------------------------ | |
| # 7οΈβ£ Prediction function | |
| # ------------------------------ | |
| def predict_price(city, rooms, area, prop_type): | |
| try: | |
| city_enc = le_city.transform([city])[0] | |
| type_enc = le_type.transform([prop_type])[0] | |
| X_new = np.array([[city_enc, float(rooms), float(area), type_enc]]) | |
| price = model.predict(X_new)[0] | |
| return f"Estimated Price: {price:,.0f} BDT" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # ------------------------------ | |
| # 8οΈβ£ Launch Gradio App | |
| # ------------------------------ | |
| locations = list(le_city.classes_) | |
| types = list(le_type.classes_) | |
| gr.Interface( | |
| fn=predict_price, | |
| inputs=[ | |
| gr.Dropdown(locations, label="City"), | |
| gr.Number(label="Rooms"), | |
| gr.Number(label="Area (sqft)"), | |
| gr.Dropdown(types, label="Property Type") | |
| ], | |
| outputs="text", | |
| title="π Simple Bangladesh House Price Predictor", | |
| description="Predict house prices based on city, rooms, area, and property type." | |
| ).launch() | |