| import pandas as pd
|
| import joblib
|
| import numpy as np
|
|
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.preprocessing import LabelEncoder, StandardScaler
|
| from sklearn.metrics import r2_score
|
|
|
| from sklearn.svm import SVR
|
| from xgboost import XGBRegressor
|
|
|
|
|
|
|
|
|
|
|
| df = pd.read_csv("../dataset/Real_Estate_Data.csv")
|
| df.columns = df.columns.str.strip().str.replace(" ", "_")
|
| print("Columns:", df.columns)
|
| print("Original dataset:", df.shape)
|
|
|
|
|
|
|
|
|
| def convert_price(price):
|
| price = str(price)
|
| price = price.replace("₹","").replace(",","").strip()
|
| try:
|
| if "Cr" in price:
|
| return float(price.replace("Cr","").strip()) * 10000000
|
| elif "Lac" in price:
|
| return float(price.replace("Lac","").strip()) * 100000
|
| elif "L" in price:
|
| return float(price.replace("L","").strip()) * 100000
|
| else:
|
| return float(price)
|
| except:
|
| return None
|
|
|
| df["Price"] = df["Price"].apply(convert_price)
|
|
|
|
|
|
|
|
|
| df["Total_Area"] = df["Total_Area"].astype(str)
|
| df["Total_Area"] = df["Total_Area"].str.replace("sqft","",regex=False)
|
| df["Total_Area"] = df["Total_Area"].str.replace("sq.ft","",regex=False)
|
| df["Total_Area"] = pd.to_numeric(df["Total_Area"], errors="coerce")
|
|
|
|
|
|
|
|
|
| df["Baths"] = pd.to_numeric(df["Baths"], errors="coerce")
|
| df["Balcony"] = pd.to_numeric(df["Balcony"], errors="coerce")
|
|
|
|
|
|
|
|
|
| df["Total_Area"] = df["Total_Area"].fillna(df["Total_Area"].median())
|
| df["Baths"] = df["Baths"].fillna(1)
|
| df["Balcony"] = df["Balcony"].fillna(0)
|
| df = df.dropna(subset=["Price","Location"])
|
|
|
|
|
|
|
|
|
| df["BHK"] = df["Baths"]
|
| df = df[["Total_Area","BHK","Baths","Balcony","Location","Price"]]
|
| print("Dataset after cleaning:", df.shape)
|
|
|
|
|
|
|
|
|
| encoder = LabelEncoder()
|
| df["Location"] = encoder.fit_transform(df["Location"])
|
|
|
|
|
|
|
|
|
| X = df.drop("Price", axis=1)
|
| y = df["Price"]
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(
|
| X, y, test_size=0.2, random_state=42
|
| )
|
|
|
|
|
|
|
|
|
| scaler_X = StandardScaler()
|
| X_train_scaled = scaler_X.fit_transform(X_train)
|
| X_test_scaled = scaler_X.transform(X_test)
|
|
|
| scaler_y = StandardScaler()
|
| y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1,1)).ravel()
|
| y_test_scaled = scaler_y.transform(y_test.values.reshape(-1,1)).ravel()
|
|
|
|
|
|
|
|
|
| svm = SVR(kernel="rbf", C=100, epsilon=0.1, gamma='scale')
|
| svm.fit(X_train_scaled, y_train_scaled)
|
| svm_pred_scaled = svm.predict(X_test_scaled)
|
| svm_pred = scaler_y.inverse_transform(svm_pred_scaled.reshape(-1,1))
|
| print("SVM R2:", r2_score(y_test, svm_pred))
|
|
|
|
|
|
|
|
|
| xgb = XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=6, random_state=42)
|
| xgb.fit(X_train, y_train)
|
| xgb_pred = xgb.predict(X_test)
|
| print("XGBoost R2:", r2_score(y_test, xgb_pred))
|
|
|
|
|
|
|
|
|
| joblib.dump(svm, "../model/svm_model.pkl")
|
| joblib.dump(xgb, "../model/xgb_model.pkl")
|
| joblib.dump(encoder, "../model/location_encoder.pkl")
|
| joblib.dump(scaler_X, "../model/scaler_X.pkl")
|
| joblib.dump(scaler_y, "../model/scaler_y.pkl")
|
| print("Models saved successfully") |