Spaces:

Haticece
/

HomePricePredictor

Sleeping

App Files Files Community

Haticece commited on Jan 1, 2025

Commit

45fa5d4

verified ·

1 Parent(s): 7c7542d

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -228

app.py CHANGED Viewed

@@ -1,235 +1,178 @@
-import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
-import warnings
-warnings.filterwarnings("ignore")
-from sklearn.preprocessing import LabelEncoder
-from sklearn.preprocessing import StandardScaler,MinMaxScaler
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.tree import DecisionTreeRegressor
-from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
-from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score,accuracy_score
-df = pd.read_csv('Housing.csv')
-df.sample(15)
-df.info()
-df.describe()
-df.isnull().sum()
-df.duplicated().sum()
-bedrooms_count = df['bedrooms'].value_counts()
-bedrooms_count
-plt.figure(figsize=(8,3))
-ax=sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values,palette="rocket_r")
-ax.bar_label(ax.containers[0], fontsize=8);
-plt.title('count of bedrooms')
-plt.xlabel('bedrooms')
-plt.ylabel('count')
-plt.show()
-count_bathrooms = df['bathrooms'].value_counts()
-count_bathrooms
-ax = sns.barplot(x=count_bathrooms.index,y=count_bathrooms.values,palette="mako")
-ax.bar_label(ax.containers[0], fontsize=8);
-plt.title('count of bathrooms')
-plt.xlabel('bathrooms')
-plt.ylabel('count')
-plt.show()
-stories_count = df['stories'].value_counts()
-stories_count
-ax = sns.barplot(x=stories_count.index,y=stories_count.values,palette="magma")
-ax.bar_label(ax.containers[0], fontsize=8)
-plt.title('count of stories')
-plt.xlabel('stories')
-plt.ylabel('count')
-plt.show()
-count_mainroad=df['mainroad'].value_counts()
-count_mainroad
-explode = [0, 0.09]
-colors = sns.color_palette("crest")
-plt.pie(count_mainroad.values,
-        labels=count_mainroad.index,
-        autopct='%.0f%%',explode=explode,
-        colors = colors)
-plt.title("count of mainroad")
-plt.legend(loc = "best")
-plt.show()
-guestroom_count = df['guestroom'].value_counts()
-guestroom_count
-explode = [0, 0.09]
-colors = sns.color_palette("crest")
-plt.pie(guestroom_count.values,
-        labels=guestroom_count.index,
-        autopct='%.0f%%',explode=explode,
-        colors = colors)
-plt.title("count of guestroom")
-plt.legend(loc = "best")
-plt.show()
-furnishingstatus_count = df.furnishingstatus.value_counts()
-furnishingstatus_count
-ax = sns.barplot(x=furnishingstatus_count.index,
-                 y=furnishingstatus_count.values,
-                 palette="magma"
-                )
-ax.bar_label(ax.containers[0], fontsize=8)
-plt.show()
-prefarea_count = df.prefarea.value_counts()
-prefarea_count
-explode = [0, 0.09]
-colors = sns.color_palette("magma")
-plt.pie(prefarea_count.values,
-        labels=prefarea_count.index,
-        autopct='%.0f%%',explode=explode,
-        colors = colors)
-plt.title("count of guestroom")
-plt.legend(loc = "best")
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="parking",palette="magma")
-for i in range(len(df['parking'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="bathrooms",palette="mako")
-for i in range(len(df['bathrooms'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="stories",palette="mako")
-for i in range(len(df['stories'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of stoies')
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="furnishingstatus",palette="viridis")
-for i in range(len(df['furnishingstatus'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of furnishingstatus')
-plt.show()
-ax = sns.countplot(df, x="parking", hue="furnishingstatus",palette="rocket_r")
-for i in range(len(df['furnishingstatus'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of furnishingstatus')
-plt.show()
-ax = sns.countplot(df, x="stories", hue="furnishingstatus",palette="cubehelix")
-for i in range(len(df['furnishingstatus'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of furnishingstatus')
-plt.show()
-ax = sns.countplot(df, x="bathrooms", hue="furnishingstatus",palette="rocket")
-for i in range(len(df['furnishingstatus'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of furnishingstatus')
-plt.show()
-ax = sns.countplot(df, x="bathrooms", hue="prefarea",palette="crest")
-for i in range(len(df['prefarea'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of prefarea')
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="prefarea",palette="cubehelix")
-for i in range(len(df['prefarea'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of prefarea')
-plt.show()
-ax = sns.countplot(df, x="stories", hue="prefarea",palette="rocket")
-for i in range(len(df['prefarea'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of prefarea')
-plt.show()
-ax = sns.countplot(df, x="parking", hue="prefarea",palette="flare")
-for i in range(len(df['prefarea'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of prefarea')
-plt.show()
-ax = sns.countplot(df, x="furnishingstatus", hue="prefarea",palette="rocket")
-for i in range(len(df['prefarea'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of prefarea')
-plt.legend(loc = 'best')
-plt.show()
-ax = sns.countplot(df, x="bathrooms", hue="hotwaterheating",palette="rocket")
-for i in range(len(df['hotwaterheating'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of hotwaterheating')
-plt.legend(loc = 'best')
-plt.show()
-ax = sns.countplot(df, x="parking", hue="hotwaterheating",palette="rocket")
-for i in range(len(df['hotwaterheating'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of hotwaterheating')
-plt.legend(loc = 'best')
-plt.show()
-ax = sns.countplot(df, x="bedrooms", hue="hotwaterheating",palette="rocket")
-for i in range(len(df['hotwaterheating'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of hotwaterheating')
-plt.legend(loc = 'best')
-plt.show()
-ax = sns.countplot(df, x="stories", hue="hotwaterheating",palette="rocket")
-for i in range(len(df['hotwaterheating'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of hotwaterheating')
-plt.legend(loc = 'best')
-plt.show()
-ax = sns.countplot(df, x="mainroad", hue="hotwaterheating",palette="rocket")
-for i in range(len(df['hotwaterheating'].unique())):
-    ax.bar_label(ax.containers[i], fontsize=8)
-plt.ylabel('count of hotwaterheating')
-plt.legend(loc = 'best')
-plt.show()
-encoder = LabelEncoder()
-encoding_col = ['furnishingstatus','prefarea','airconditioning','hotwaterheating','basement','guestroom','mainroad']
-for col in encoding_col:
-    df[col]=encoder.fit_transform(df[col])
-df
-plt.figure(figsize=(10, 10))
-sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True)
-plt.show()
-x=df.drop(columns=['price'],axis = 1)
-y=df['price']
-scaler = MinMaxScaler()
-x = scaler.fit_transform(x)
-y = scaler.fit_transform(y.values.reshape(-1, 1))
-x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=50)
-ln_model = LinearRegression()
-ln_model.fit(x_train, y_train)
-y_pred = ln_model.predict(x_test)
-ln_acc = r2_score(y_test, y_pred)
-ln_acc
-y_pred = ln_model.predict(x_test)
-ln_acc = r2_score(y_test, y_pred)
-ln_acc
-dt_model = DecisionTreeRegressor()
-dt_model.fit(x_train, y_train)
-y_pred = dt_model.predict(x_test)
-dt_acc = r2_score(y_test, y_pred)
-dt_acc
-rf_model = RandomForestRegressor(n_estimators=100)
-rf_model.fit(x_train, y_train)
-y_pred = rf_model.predict(x_test)
-rf_acc = r2_score(y_test, y_pred)
-rf_acc
 from sklearn.svm import SVR
-svr_model = SVR(kernel='linear')
-svr_model.fit(x_train, y_train)
-y_pred = svr_model.predict(x_test)
-svr_acc = r2_score(y_test, y_pred)
-svr_acc
-from sklearn.ensemble import GradientBoostingRegressor
-gb_model = GradientBoostingRegressor()
-gb_model.fit(x_train, y_train)
-y_pred = gb_model.predict(x_test)
-gb_acc = r2_score(y_test, y_pred)
-gb_acc
-from sklearn.ensemble import AdaBoostRegressor
-ada_model = AdaBoostRegressor()
-ada_model.fit(x_train, y_train)
-y_pred = ada_model.predict(x_test)
-ada_acc = r2_score(y_test, y_pred)
-ada_acc

+import streamlit as st
 import pandas as pd
+import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
 from sklearn.svm import SVR
+from sklearn.metrics import r2_score
+# Uyarıları gizle
+import warnings
+warnings.filterwarnings("ignore")
+# Veri Yükleme ve Ön İşleme
+@st.cache_data
+def load_data():
+    df = pd.read_csv('Housing.csv')
+    # Gereksiz sütunu sil (eğer varsa)
+    if 'date' in df.columns:
+        df = df.drop('date', axis=1)
+    # Encoding
+    encoding_col = ['furnishingstatus', 'prefarea', 'airconditioning', 'hotwaterheating', 'basement', 'guestroom', 'mainroad']
+    encoder = LabelEncoder()
+    for col in encoding_col:
+        df[col] = encoder.fit_transform(df[col])
+    return df
+df = load_data()
+# Model Eğitimi Fonksiyonu
+def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    r2 = r2_score(y_test, y_pred)
+    return r2
+# Streamlit Arayüzü
+st.title("Ev Fiyat Tahmini Uygulaması")
+# Kenar Çubuğu - Model Seçimi
+st.sidebar.header("Model Seçimi")
+selected_model = st.sidebar.selectbox("Model Seçin", ["Linear Regression", "Decision Tree", "Random Forest", "SVR", "Gradient Boosting", "AdaBoost"])
+# Kenar Çubuğu - Veri Seti İstatistikleri
+st.sidebar.header("Veri Seti İstatistikleri")
+if st.sidebar.checkbox("İstatistikleri Göster"):
+    st.subheader("Veri Seti İstatistikleri")
+    st.write(df.describe())
+# Kenar Çubuğu - Grafikler
+st.sidebar.header("Grafikler")
+if st.sidebar.checkbox("Grafikleri Göster"):
+    st.subheader("Grafikler")
+    # Count of Bedrooms
+    st.subheader("Oda Sayısı Dağılımı")
+    bedrooms_count = df['bedrooms'].value_counts()
+    fig, ax = plt.subplots(figsize=(8, 3))
+    sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values, palette="rocket_r", ax=ax)
+    for p in ax.patches:
+        ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
+                     ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
+                     textcoords='offset points')
+    st.pyplot(fig)
+    # Count of Bathrooms
+    st.subheader("Banyo Sayısı Dağılımı")
+    bathrooms_count = df['bathrooms'].value_counts()
+    fig, ax = plt.subplots()
+    sns.barplot(x=bathrooms_count.index, y=bathrooms_count.values, palette="mako", ax=ax)
+    for p in ax.patches:
+        ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
+                    ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
+                    textcoords='offset points')
+    st.pyplot(fig)
+    # Count of Stories
+    st.subheader("Kat Sayısı Dağılımı")
+    stories_count = df['stories'].value_counts()
+    fig, ax = plt.subplots()
+    sns.barplot(x=stories_count.index, y=stories_count.values, palette="magma", ax=ax)
+    for p in ax.patches:
+        ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
+                    ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
+                    textcoords='offset points')
+    st.pyplot(fig)
+    # Count of Mainroad
+    st.subheader("Ana Yola Bağlantı Dağılımı")
+    mainroad_count = df['mainroad'].value_counts()
+    fig, ax = plt.subplots()
+    explode = [0, 0.09]
+    colors = sns.color_palette("crest")
+    patches, texts, autotexts = ax.pie(mainroad_count.values, labels=mainroad_count.index, autopct='%.0f%%', explode=explode, colors=colors)
+    for autotext in autotexts:
+        autotext.set_color('black')
+    plt.title("Ana Yola Bağlantı")
+    plt.legend(loc="best")
+    st.pyplot(fig)
+    # Count of Guestroom
+    st.subheader("Misafir Odası Dağılımı")
+    guestroom_count = df['guestroom'].value_counts()
+    fig, ax = plt.subplots()
+    explode = [0, 0.09]
+    colors = sns.color_palette("crest")
+    patches, texts, autotexts = ax.pie(guestroom_count.values, labels=guestroom_count.index, autopct='%.0f%%', explode=explode, colors=colors)
+    for autotext in autotexts:
+        autotext.set_color('black')
+    plt.title("Misafir Odası")
+    plt.legend(loc="best")
+    st.pyplot(fig)
+    # Count of Furnishing Status
+    st.subheader("Eşya Durumu Dağılımı")
+    furnishingstatus_count = df['furnishingstatus'].value_counts()
+    fig, ax = plt.subplots()
+    sns.barplot(x=furnishingstatus_count.index, y=furnishingstatus_count.values, palette="magma", ax=ax)
+    for p in ax.patches:
+        ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
+                    ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
+                    textcoords='offset points')
+    st.pyplot(fig)
+    # Count of Prefarea
+    st.subheader("Tercih Edilen Bölge Dağılımı")
+    prefarea_count = df['prefarea'].value_counts()
+    fig, ax = plt.subplots()
+    explode = [0, 0.09]
+    colors = sns.color_palette("magma")
+    patches, texts, autotexts = ax.pie(prefarea_count.values, labels=prefarea_count.index, autopct='%.0f%%', explode=explode, colors=colors)
+    for autotext in autotexts:
+        autotext.set_color('black')
+    plt.title("Tercih Edilen Bölge")
+    plt.legend(loc="best")
+    st.pyplot(fig)
+    # Correlation Heatmap
+    st.subheader("Korelasyon Matrisi")
+    fig, ax = plt.subplots(figsize=(10, 10))
+    sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True, ax=ax)
+    st.pyplot(fig)
+# Ana Bölüm - Model Sonuçları ve Tahmin
+st.header("Model Sonuçları")
+# Veri Bölme ve Ölçeklendirme
+X = df.drop(columns=['price'], axis=1)
+y = df['price']
+scaler = MinMaxScaler()
+X = scaler.fit_transform(X)
+y = scaler.fit_transform(y.values.reshape(-1, 1))
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)
+# Model Seçimine Göre Sonuçları Gösterme
+if selected_model == "Linear Regression":
+    model = LinearRegression()
+elif selected_model == "Decision Tree":
+    model = DecisionTreeRegressor()
+elif selected_model == "Random Forest":
+    model = RandomForestRegressor(n_estimators=100)
+elif selected_model == "SVR":
+    model = SVR(kernel='linear')
+elif selected_model == "Gradient Boosting":
+    model = GradientBoostingRegressor()
+elif selected_model == "AdaBoost":
+    model = AdaBoostRegressor()
+r2 = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
+st.write(f"{selected_model} R-kare Değeri: {r2:.3f}")