import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error, mean_squared_error # Title st.title("Real Estate Price Prediction") # Instructions st.write("## Instructions") st.write("1. Upload a CSV file containing real estate data.") st.write("2. Select the target column (Price) from the dataset.") st.write("3. Choose feature columns that influence the price.") st.write("4. The model will be trained on the provided data and evaluated.") st.write("5. Enter values for the selected features to predict the price of a property.") # Upload Dataset uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) if uploaded_file: df = pd.read_csv(uploaded_file) st.write("Dataset Preview:") st.write(df.head()) # Select Features and Target target_column = st.selectbox("Select the target column (Price)", df.columns) feature_columns = st.multiselect("Select feature columns", [col for col in df.columns if col != target_column]) if feature_columns and target_column: X = df[feature_columns] y = df[target_column] # Handling missing values X.fillna(X.mean(), inplace=True) y.fillna(y.mean(), inplace=True) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model = LinearRegression() model.fit(X_train, y_train) # Predict y_pred = model.predict(X_test) # Display Metrics st.write("### Model Performance") st.write(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}") st.write(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}") st.write(f"R² Score: {model.score(X_test, y_test):.2f}") # Prediction st.write("### Make a Prediction") user_input = {col: st.number_input(f"{col}", value=float(X[col].mean())) for col in feature_columns} if st.button("Predict Price"): user_df = pd.DataFrame([user_input]) prediction = model.predict(user_df)[0] st.success(f"Predicted Price: ${prediction:,.2f}")