Haticece commited on
Commit
45fa5d4
·
verified ·
1 Parent(s): 7c7542d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -228
app.py CHANGED
@@ -1,235 +1,178 @@
1
- import numpy as np
2
  import pandas as pd
 
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
- import warnings
6
- warnings.filterwarnings("ignore")
7
-
8
- from sklearn.preprocessing import LabelEncoder
9
- from sklearn.preprocessing import StandardScaler,MinMaxScaler
10
  from sklearn.model_selection import train_test_split
11
-
12
  from sklearn.linear_model import LinearRegression
13
  from sklearn.tree import DecisionTreeRegressor
14
- from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
15
-
16
- from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score,accuracy_score
17
- df = pd.read_csv('Housing.csv')
18
- df.sample(15)
19
- df.info()
20
- df.describe()
21
- df.isnull().sum()
22
- df.duplicated().sum()
23
- bedrooms_count = df['bedrooms'].value_counts()
24
- bedrooms_count
25
- plt.figure(figsize=(8,3))
26
- ax=sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values,palette="rocket_r")
27
- ax.bar_label(ax.containers[0], fontsize=8);
28
- plt.title('count of bedrooms')
29
- plt.xlabel('bedrooms')
30
- plt.ylabel('count')
31
- plt.show()
32
- count_bathrooms = df['bathrooms'].value_counts()
33
- count_bathrooms
34
- ax = sns.barplot(x=count_bathrooms.index,y=count_bathrooms.values,palette="mako")
35
- ax.bar_label(ax.containers[0], fontsize=8);
36
- plt.title('count of bathrooms')
37
- plt.xlabel('bathrooms')
38
- plt.ylabel('count')
39
- plt.show()
40
- stories_count = df['stories'].value_counts()
41
- stories_count
42
- ax = sns.barplot(x=stories_count.index,y=stories_count.values,palette="magma")
43
- ax.bar_label(ax.containers[0], fontsize=8)
44
- plt.title('count of stories')
45
- plt.xlabel('stories')
46
- plt.ylabel('count')
47
- plt.show()
48
- count_mainroad=df['mainroad'].value_counts()
49
- count_mainroad
50
- explode = [0, 0.09]
51
- colors = sns.color_palette("crest")
52
- plt.pie(count_mainroad.values,
53
- labels=count_mainroad.index,
54
- autopct='%.0f%%',explode=explode,
55
- colors = colors)
56
- plt.title("count of mainroad")
57
- plt.legend(loc = "best")
58
-
59
- plt.show()
60
- guestroom_count = df['guestroom'].value_counts()
61
- guestroom_count
62
- explode = [0, 0.09]
63
- colors = sns.color_palette("crest")
64
- plt.pie(guestroom_count.values,
65
- labels=guestroom_count.index,
66
- autopct='%.0f%%',explode=explode,
67
- colors = colors)
68
- plt.title("count of guestroom")
69
- plt.legend(loc = "best")
70
-
71
- plt.show()
72
- furnishingstatus_count = df.furnishingstatus.value_counts()
73
- furnishingstatus_count
74
- ax = sns.barplot(x=furnishingstatus_count.index,
75
- y=furnishingstatus_count.values,
76
- palette="magma"
77
-
78
- )
79
- ax.bar_label(ax.containers[0], fontsize=8)
80
- plt.show()
81
- prefarea_count = df.prefarea.value_counts()
82
- prefarea_count
83
- explode = [0, 0.09]
84
- colors = sns.color_palette("magma")
85
- plt.pie(prefarea_count.values,
86
- labels=prefarea_count.index,
87
- autopct='%.0f%%',explode=explode,
88
- colors = colors)
89
- plt.title("count of guestroom")
90
- plt.legend(loc = "best")
91
-
92
- plt.show()
93
- ax = sns.countplot(df, x="bedrooms", hue="parking",palette="magma")
94
- for i in range(len(df['parking'].unique())):
95
- ax.bar_label(ax.containers[i], fontsize=8)
96
-
97
-
98
- plt.show()
99
- ax = sns.countplot(df, x="bedrooms", hue="bathrooms",palette="mako")
100
- for i in range(len(df['bathrooms'].unique())):
101
- ax.bar_label(ax.containers[i], fontsize=8)
102
- plt.show()
103
- ax = sns.countplot(df, x="bedrooms", hue="stories",palette="mako")
104
- for i in range(len(df['stories'].unique())):
105
- ax.bar_label(ax.containers[i], fontsize=8)
106
- plt.ylabel('count of stoies')
107
- plt.show()
108
- ax = sns.countplot(df, x="bedrooms", hue="furnishingstatus",palette="viridis")
109
- for i in range(len(df['furnishingstatus'].unique())):
110
- ax.bar_label(ax.containers[i], fontsize=8)
111
- plt.ylabel('count of furnishingstatus')
112
- plt.show()
113
- ax = sns.countplot(df, x="parking", hue="furnishingstatus",palette="rocket_r")
114
- for i in range(len(df['furnishingstatus'].unique())):
115
- ax.bar_label(ax.containers[i], fontsize=8)
116
- plt.ylabel('count of furnishingstatus')
117
- plt.show()
118
- ax = sns.countplot(df, x="stories", hue="furnishingstatus",palette="cubehelix")
119
- for i in range(len(df['furnishingstatus'].unique())):
120
- ax.bar_label(ax.containers[i], fontsize=8)
121
- plt.ylabel('count of furnishingstatus')
122
- plt.show()
123
- ax = sns.countplot(df, x="bathrooms", hue="furnishingstatus",palette="rocket")
124
- for i in range(len(df['furnishingstatus'].unique())):
125
- ax.bar_label(ax.containers[i], fontsize=8)
126
- plt.ylabel('count of furnishingstatus')
127
- plt.show()
128
- ax = sns.countplot(df, x="bathrooms", hue="prefarea",palette="crest")
129
- for i in range(len(df['prefarea'].unique())):
130
- ax.bar_label(ax.containers[i], fontsize=8)
131
- plt.ylabel('count of prefarea')
132
- plt.show()
133
- ax = sns.countplot(df, x="bedrooms", hue="prefarea",palette="cubehelix")
134
- for i in range(len(df['prefarea'].unique())):
135
- ax.bar_label(ax.containers[i], fontsize=8)
136
- plt.ylabel('count of prefarea')
137
- plt.show()
138
- ax = sns.countplot(df, x="stories", hue="prefarea",palette="rocket")
139
- for i in range(len(df['prefarea'].unique())):
140
- ax.bar_label(ax.containers[i], fontsize=8)
141
- plt.ylabel('count of prefarea')
142
- plt.show()
143
- ax = sns.countplot(df, x="parking", hue="prefarea",palette="flare")
144
- for i in range(len(df['prefarea'].unique())):
145
- ax.bar_label(ax.containers[i], fontsize=8)
146
- plt.ylabel('count of prefarea')
147
- plt.show()
148
- ax = sns.countplot(df, x="furnishingstatus", hue="prefarea",palette="rocket")
149
- for i in range(len(df['prefarea'].unique())):
150
- ax.bar_label(ax.containers[i], fontsize=8)
151
- plt.ylabel('count of prefarea')
152
- plt.legend(loc = 'best')
153
- plt.show()
154
- ax = sns.countplot(df, x="bathrooms", hue="hotwaterheating",palette="rocket")
155
- for i in range(len(df['hotwaterheating'].unique())):
156
- ax.bar_label(ax.containers[i], fontsize=8)
157
- plt.ylabel('count of hotwaterheating')
158
- plt.legend(loc = 'best')
159
- plt.show()
160
- ax = sns.countplot(df, x="parking", hue="hotwaterheating",palette="rocket")
161
- for i in range(len(df['hotwaterheating'].unique())):
162
- ax.bar_label(ax.containers[i], fontsize=8)
163
- plt.ylabel('count of hotwaterheating')
164
- plt.legend(loc = 'best')
165
- plt.show()
166
- ax = sns.countplot(df, x="bedrooms", hue="hotwaterheating",palette="rocket")
167
- for i in range(len(df['hotwaterheating'].unique())):
168
- ax.bar_label(ax.containers[i], fontsize=8)
169
- plt.ylabel('count of hotwaterheating')
170
- plt.legend(loc = 'best')
171
- plt.show()
172
- ax = sns.countplot(df, x="stories", hue="hotwaterheating",palette="rocket")
173
- for i in range(len(df['hotwaterheating'].unique())):
174
- ax.bar_label(ax.containers[i], fontsize=8)
175
- plt.ylabel('count of hotwaterheating')
176
- plt.legend(loc = 'best')
177
- plt.show()
178
-
179
- ax = sns.countplot(df, x="mainroad", hue="hotwaterheating",palette="rocket")
180
- for i in range(len(df['hotwaterheating'].unique())):
181
- ax.bar_label(ax.containers[i], fontsize=8)
182
- plt.ylabel('count of hotwaterheating')
183
- plt.legend(loc = 'best')
184
- plt.show()
185
- encoder = LabelEncoder()
186
- encoding_col = ['furnishingstatus','prefarea','airconditioning','hotwaterheating','basement','guestroom','mainroad']
187
- for col in encoding_col:
188
- df[col]=encoder.fit_transform(df[col])
189
- df
190
- plt.figure(figsize=(10, 10))
191
- sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True)
192
- plt.show()
193
- x=df.drop(columns=['price'],axis = 1)
194
- y=df['price']
195
- scaler = MinMaxScaler()
196
- x = scaler.fit_transform(x)
197
- y = scaler.fit_transform(y.values.reshape(-1, 1))
198
- x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=50)
199
- ln_model = LinearRegression()
200
- ln_model.fit(x_train, y_train)
201
-
202
- y_pred = ln_model.predict(x_test)
203
- ln_acc = r2_score(y_test, y_pred)
204
- ln_acc
205
- y_pred = ln_model.predict(x_test)
206
- ln_acc = r2_score(y_test, y_pred)
207
- ln_acc
208
- dt_model = DecisionTreeRegressor()
209
- dt_model.fit(x_train, y_train)
210
- y_pred = dt_model.predict(x_test)
211
- dt_acc = r2_score(y_test, y_pred)
212
- dt_acc
213
- rf_model = RandomForestRegressor(n_estimators=100)
214
- rf_model.fit(x_train, y_train)
215
- y_pred = rf_model.predict(x_test)
216
- rf_acc = r2_score(y_test, y_pred)
217
- rf_acc
218
  from sklearn.svm import SVR
219
- svr_model = SVR(kernel='linear')
220
- svr_model.fit(x_train, y_train)
221
- y_pred = svr_model.predict(x_test)
222
- svr_acc = r2_score(y_test, y_pred)
223
- svr_acc
224
- from sklearn.ensemble import GradientBoostingRegressor
225
- gb_model = GradientBoostingRegressor()
226
- gb_model.fit(x_train, y_train)
227
- y_pred = gb_model.predict(x_test)
228
- gb_acc = r2_score(y_test, y_pred)
229
- gb_acc
230
- from sklearn.ensemble import AdaBoostRegressor
231
- ada_model = AdaBoostRegressor()
232
- ada_model.fit(x_train, y_train)
233
- y_pred = ada_model.predict(x_test)
234
- ada_acc = r2_score(y_test, y_pred)
235
- ada_acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 
 
 
 
7
  from sklearn.model_selection import train_test_split
 
8
  from sklearn.linear_model import LinearRegression
9
  from sklearn.tree import DecisionTreeRegressor
10
+ from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  from sklearn.svm import SVR
12
+ from sklearn.metrics import r2_score
13
+
14
+ # Uyarıları gizle
15
+ import warnings
16
+ warnings.filterwarnings("ignore")
17
+
18
+ # Veri Yükleme ve Ön İşleme
19
+ @st.cache_data
20
+ def load_data():
21
+ df = pd.read_csv('Housing.csv')
22
+
23
+ # Gereksiz sütunu sil (eğer varsa)
24
+ if 'date' in df.columns:
25
+ df = df.drop('date', axis=1)
26
+
27
+ # Encoding
28
+ encoding_col = ['furnishingstatus', 'prefarea', 'airconditioning', 'hotwaterheating', 'basement', 'guestroom', 'mainroad']
29
+ encoder = LabelEncoder()
30
+ for col in encoding_col:
31
+ df[col] = encoder.fit_transform(df[col])
32
+
33
+ return df
34
+
35
+ df = load_data()
36
+
37
+ # Model Eğitimi Fonksiyonu
38
+ def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
39
+ model.fit(X_train, y_train)
40
+ y_pred = model.predict(X_test)
41
+ r2 = r2_score(y_test, y_pred)
42
+ return r2
43
+
44
+ # Streamlit Arayüzü
45
+ st.title("Ev Fiyat Tahmini Uygulaması")
46
+
47
+ # Kenar Çubuğu - Model Seçimi
48
+ st.sidebar.header("Model Seçimi")
49
+ selected_model = st.sidebar.selectbox("Model Seçin", ["Linear Regression", "Decision Tree", "Random Forest", "SVR", "Gradient Boosting", "AdaBoost"])
50
+
51
+ # Kenar Çubuğu - Veri Seti İstatistikleri
52
+ st.sidebar.header("Veri Seti İstatistikleri")
53
+ if st.sidebar.checkbox("İstatistikleri Göster"):
54
+ st.subheader("Veri Seti İstatistikleri")
55
+ st.write(df.describe())
56
+
57
+ # Kenar Çubuğu - Grafikler
58
+ st.sidebar.header("Grafikler")
59
+ if st.sidebar.checkbox("Grafikleri Göster"):
60
+ st.subheader("Grafikler")
61
+
62
+ # Count of Bedrooms
63
+ st.subheader("Oda Sayısı Dağılımı")
64
+ bedrooms_count = df['bedrooms'].value_counts()
65
+ fig, ax = plt.subplots(figsize=(8, 3))
66
+ sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values, palette="rocket_r", ax=ax)
67
+ for p in ax.patches:
68
+ ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
69
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
70
+ textcoords='offset points')
71
+ st.pyplot(fig)
72
+
73
+ # Count of Bathrooms
74
+ st.subheader("Banyo Sayısı Dağılımı")
75
+ bathrooms_count = df['bathrooms'].value_counts()
76
+ fig, ax = plt.subplots()
77
+ sns.barplot(x=bathrooms_count.index, y=bathrooms_count.values, palette="mako", ax=ax)
78
+ for p in ax.patches:
79
+ ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
80
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
81
+ textcoords='offset points')
82
+ st.pyplot(fig)
83
+
84
+ # Count of Stories
85
+ st.subheader("Kat Sayısı Dağılımı")
86
+ stories_count = df['stories'].value_counts()
87
+ fig, ax = plt.subplots()
88
+ sns.barplot(x=stories_count.index, y=stories_count.values, palette="magma", ax=ax)
89
+ for p in ax.patches:
90
+ ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
91
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
92
+ textcoords='offset points')
93
+ st.pyplot(fig)
94
+
95
+ # Count of Mainroad
96
+ st.subheader("Ana Yola Bağlantı Dağılımı")
97
+ mainroad_count = df['mainroad'].value_counts()
98
+ fig, ax = plt.subplots()
99
+ explode = [0, 0.09]
100
+ colors = sns.color_palette("crest")
101
+ patches, texts, autotexts = ax.pie(mainroad_count.values, labels=mainroad_count.index, autopct='%.0f%%', explode=explode, colors=colors)
102
+ for autotext in autotexts:
103
+ autotext.set_color('black')
104
+ plt.title("Ana Yola Bağlantı")
105
+ plt.legend(loc="best")
106
+ st.pyplot(fig)
107
+
108
+ # Count of Guestroom
109
+ st.subheader("Misafir Odası Dağılımı")
110
+ guestroom_count = df['guestroom'].value_counts()
111
+ fig, ax = plt.subplots()
112
+ explode = [0, 0.09]
113
+ colors = sns.color_palette("crest")
114
+ patches, texts, autotexts = ax.pie(guestroom_count.values, labels=guestroom_count.index, autopct='%.0f%%', explode=explode, colors=colors)
115
+ for autotext in autotexts:
116
+ autotext.set_color('black')
117
+ plt.title("Misafir Odası")
118
+ plt.legend(loc="best")
119
+ st.pyplot(fig)
120
+
121
+ # Count of Furnishing Status
122
+ st.subheader("Eşya Durumu Dağılımı")
123
+ furnishingstatus_count = df['furnishingstatus'].value_counts()
124
+ fig, ax = plt.subplots()
125
+ sns.barplot(x=furnishingstatus_count.index, y=furnishingstatus_count.values, palette="magma", ax=ax)
126
+ for p in ax.patches:
127
+ ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
128
+ ha='center', va='center', fontsize=8, color='black', xytext=(0, 5),
129
+ textcoords='offset points')
130
+ st.pyplot(fig)
131
+
132
+ # Count of Prefarea
133
+ st.subheader("Tercih Edilen Bölge Dağılımı")
134
+ prefarea_count = df['prefarea'].value_counts()
135
+ fig, ax = plt.subplots()
136
+ explode = [0, 0.09]
137
+ colors = sns.color_palette("magma")
138
+ patches, texts, autotexts = ax.pie(prefarea_count.values, labels=prefarea_count.index, autopct='%.0f%%', explode=explode, colors=colors)
139
+ for autotext in autotexts:
140
+ autotext.set_color('black')
141
+ plt.title("Tercih Edilen Bölge")
142
+ plt.legend(loc="best")
143
+ st.pyplot(fig)
144
+
145
+ # Correlation Heatmap
146
+ st.subheader("Korelasyon Matrisi")
147
+ fig, ax = plt.subplots(figsize=(10, 10))
148
+ sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True, ax=ax)
149
+ st.pyplot(fig)
150
+
151
+ # Ana Bölüm - Model Sonuçları ve Tahmin
152
+ st.header("Model Sonuçları")
153
+
154
+ # Veri Bölme ve Ölçeklendirme
155
+ X = df.drop(columns=['price'], axis=1)
156
+ y = df['price']
157
+ scaler = MinMaxScaler()
158
+ X = scaler.fit_transform(X)
159
+ y = scaler.fit_transform(y.values.reshape(-1, 1))
160
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)
161
+
162
+ # Model Seçimine Göre Sonuçları Gösterme
163
+ if selected_model == "Linear Regression":
164
+ model = LinearRegression()
165
+ elif selected_model == "Decision Tree":
166
+ model = DecisionTreeRegressor()
167
+ elif selected_model == "Random Forest":
168
+ model = RandomForestRegressor(n_estimators=100)
169
+ elif selected_model == "SVR":
170
+ model = SVR(kernel='linear')
171
+ elif selected_model == "Gradient Boosting":
172
+ model = GradientBoostingRegressor()
173
+ elif selected_model == "AdaBoost":
174
+ model = AdaBoostRegressor()
175
+
176
+ r2 = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
177
+
178
+ st.write(f"{selected_model} R-kare Değeri: {r2:.3f}")