Haticece commited on
Commit
94f3bdd
·
verified ·
1 Parent(s): 8c93418

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -118
app.py CHANGED
@@ -1,121 +1,235 @@
1
- import streamlit as st
2
- import pandas as pd
3
  import numpy as np
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.preprocessing import StandardScaler
6
- from sklearn.linear_model import LinearRegression
7
-
8
- # Veri Yükleme ve Ön İşleme (Kaggle Notebook'tan uyarlanmıştır)
9
- @st.cache_data
10
- def load_and_preprocess_data():
11
- data = pd.read_csv('Housing.csv')
12
-
13
- # Gereksiz sütunu sil (eğer varsa)
14
- if 'date' in data.columns:
15
- data = data.drop('date', axis=1)
16
-
17
- # Aykırı değerleri işle
18
- data = data[data['bedrooms'] != 33]
19
-
20
- # Saçma değerleri düzelt
21
- data.loc[data['bathrooms'] == 0, 'bathrooms'] = 1
22
- data.loc[data['bedrooms'] == 0, 'bedrooms'] = 1
23
-
24
- # Kategorik sütunlar için binary encoding
25
- binary_columns = ['waterfront', 'view', 'condition']
26
- def binary_encode(df, column, positive_value):
27
- df[column] = df[column].apply(lambda x: 1 if x == positive_value else 0)
28
- for col in binary_columns:
29
- binary_encode(data, col, data[col].max())
30
-
31
- # Log dönüşümü
32
- data['sqft_living'] = np.log(data['sqft_living'])
33
- data['sqft_lot'] = np.log(data['sqft_lot'])
34
- data['sqft_above'] = np.log(data['sqft_above'])
35
- data.loc[data['sqft_basement'] != 0, 'sqft_basement'] = np.log(data.loc[data['sqft_basement'] != 0, 'sqft_basement'])
36
-
37
- # Normalleştirme
38
- scaler = StandardScaler()
39
- numerical_cols = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'sqft_above', 'sqft_basement']
40
- data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
41
-
42
- return data
43
-
44
- data = load_and_preprocess_data()
45
-
46
- # Model Eğitimi (Kaggle Notebook'tan uyarlanmıştır)
47
- @st.cache_data
48
- def train_model(data):
49
- X = data.drop('price', axis=1)
50
- y = data['price']
51
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)
52
- model = LinearRegression()
53
- model.fit(X_train, y_train)
54
- return model, X_test, y_test
55
-
56
- model, X_test, y_test = train_model(data)
57
-
58
- # Streamlit Arayüzü
59
- st.title("Ev Fiyatı Tahmin Uygulaması")
60
-
61
- # Kenar Çubuğu Filtreleri
62
- st.sidebar.header("Filtreler")
63
-
64
- # Oda Sayısı
65
- oda_sayilari = sorted(data['bedrooms'].unique())
66
- secilen_oda_sayilari = st.sidebar.multiselect('Oda Sayısı', oda_sayilari, oda_sayilari)
67
-
68
- # Banyo Sayısı
69
- banyo_sayilari = sorted(data['bathrooms'].unique())
70
- secilen_banyo_sayilari = st.sidebar.multiselect('Banyo Sayısı', banyo_sayilari, banyo_sayilari)
71
-
72
- # Kat Sayısı
73
- kat_sayilari = sorted(data['floors'].unique())
74
- secilen_kat_sayilari = st.sidebar.multiselect('Kat Sayısı', kat_sayilari, kat_sayilari)
75
-
76
- # Manzara
77
- manzara_secenekleri = sorted(data['view'].unique())
78
- secilen_manzara = st.sidebar.multiselect('Manzara (0-4 arası)', manzara_secenekleri, manzara_secenekleri)
79
-
80
- # Durum
81
- durum_secenekleri = sorted(data['condition'].unique())
82
- secilen_durum = st.sidebar.multiselect('Durum (1-5 arası)', durum_secenekleri, durum_secenekleri)
83
-
84
- # Yaşam alanı
85
- min_living = int(data['sqft_living'].min())
86
- max_living = int(data['sqft_living'].max())
87
- living_range = st.sidebar.slider("Yaşam Alanı (log-dönüştürülmüş)", min_living, max_living, (min_living, max_living))
88
-
89
- # Filtrelenmiş Veri
90
- filtered_data = data[
91
- (data['bedrooms'].isin(secilen_oda_sayilari)) &
92
- (data['bathrooms'].isin(secilen_banyo_sayilari)) &
93
- (data['floors'].isin(secilen_kat_sayilari)) &
94
- (data['view'].isin(secilen_manzara)) &
95
- (data['condition'].isin(secilen_durum)) &
96
- (data['sqft_living'] >= living_range[0]) &
97
- (data['sqft_living'] <= living_range[1])
98
- ]
99
-
100
- # Sonuçları Gösterme
101
- st.write(f"Seçimlerinize uyan {len(filtered_data)} ev bulundu.")
102
-
103
- if not filtered_data.empty:
104
- st.subheader("Fiyat İstatistikleri")
105
- st.write(f"Ortalama Fiyat: ${filtered_data['price'].mean():,.2f}")
106
- st.write(f"Minimum Fiyat: ${filtered_data['price'].min():,.2f}")
107
- st.write(f"Maksimum Fiyat: ${filtered_data['price'].max():,.2f}")
108
- st.write(f"Medyan Fiyat: ${filtered_data['price'].median():,.2f}")
109
- st.write(f"Standart Sapma: ${filtered_data['price'].std():,.2f}")
110
-
111
- st.subheader("Seçilen Evler")
112
- st.dataframe(filtered_data)
113
 
114
- # Model Performansı (Test verisi üzerinde)
115
- st.subheader("Model Performansı (R-kare)")
116
- y_pred = model.predict(X_test)
117
- r2 = r2_score(y_test, y_pred)
118
- st.write(f"R-kare: {r2:.3f}")
119
 
120
- else:
121
- st.write("Seçimlerinize uyan ev bulunamadı.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from sklearn.preprocessing import StandardScaler,MinMaxScaler
10
+ from sklearn.model_selection import train_test_split
 
 
11
 
12
+ from sklearn.linear_model import LinearRegression
13
+ from sklearn.tree import DecisionTreeRegressor
14
+ from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
15
+
16
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score,accuracy_score
17
+ df = pd.read_csv('/Users/haticecakir/Downloads/Housing.csv')
18
+ df.sample(15)
19
+ df.info()
20
+ df.describe()
21
+ df.isnull().sum()
22
+ df.duplicated().sum()
23
+ bedrooms_count = df['bedrooms'].value_counts()
24
+ bedrooms_count
25
+ plt.figure(figsize=(8,3))
26
+ ax=sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values,palette="rocket_r")
27
+ ax.bar_label(ax.containers[0], fontsize=8);
28
+ plt.title('count of bedrooms')
29
+ plt.xlabel('bedrooms')
30
+ plt.ylabel('count')
31
+ plt.show()
32
+ count_bathrooms = df['bathrooms'].value_counts()
33
+ count_bathrooms
34
+ ax = sns.barplot(x=count_bathrooms.index,y=count_bathrooms.values,palette="mako")
35
+ ax.bar_label(ax.containers[0], fontsize=8);
36
+ plt.title('count of bathrooms')
37
+ plt.xlabel('bathrooms')
38
+ plt.ylabel('count')
39
+ plt.show()
40
+ stories_count = df['stories'].value_counts()
41
+ stories_count
42
+ ax = sns.barplot(x=stories_count.index,y=stories_count.values,palette="magma")
43
+ ax.bar_label(ax.containers[0], fontsize=8)
44
+ plt.title('count of stories')
45
+ plt.xlabel('stories')
46
+ plt.ylabel('count')
47
+ plt.show()
48
+ count_mainroad=df['mainroad'].value_counts()
49
+ count_mainroad
50
+ explode = [0, 0.09]
51
+ colors = sns.color_palette("crest")
52
+ plt.pie(count_mainroad.values,
53
+ labels=count_mainroad.index,
54
+ autopct='%.0f%%',explode=explode,
55
+ colors = colors)
56
+ plt.title("count of mainroad")
57
+ plt.legend(loc = "best")
58
+
59
+ plt.show()
60
+ guestroom_count = df['guestroom'].value_counts()
61
+ guestroom_count
62
+ explode = [0, 0.09]
63
+ colors = sns.color_palette("crest")
64
+ plt.pie(guestroom_count.values,
65
+ labels=guestroom_count.index,
66
+ autopct='%.0f%%',explode=explode,
67
+ colors = colors)
68
+ plt.title("count of guestroom")
69
+ plt.legend(loc = "best")
70
+
71
+ plt.show()
72
+ furnishingstatus_count = df.furnishingstatus.value_counts()
73
+ furnishingstatus_count
74
+ ax = sns.barplot(x=furnishingstatus_count.index,
75
+ y=furnishingstatus_count.values,
76
+ palette="magma"
77
+
78
+ )
79
+ ax.bar_label(ax.containers[0], fontsize=8)
80
+ plt.show()
81
+ prefarea_count = df.prefarea.value_counts()
82
+ prefarea_count
83
+ explode = [0, 0.09]
84
+ colors = sns.color_palette("magma")
85
+ plt.pie(prefarea_count.values,
86
+ labels=prefarea_count.index,
87
+ autopct='%.0f%%',explode=explode,
88
+ colors = colors)
89
+ plt.title("count of guestroom")
90
+ plt.legend(loc = "best")
91
+
92
+ plt.show()
93
+ ax = sns.countplot(df, x="bedrooms", hue="parking",palette="magma")
94
+ for i in range(len(df['parking'].unique())):
95
+ ax.bar_label(ax.containers[i], fontsize=8)
96
+
97
+
98
+ plt.show()
99
+ ax = sns.countplot(df, x="bedrooms", hue="bathrooms",palette="mako")
100
+ for i in range(len(df['bathrooms'].unique())):
101
+ ax.bar_label(ax.containers[i], fontsize=8)
102
+ plt.show()
103
+ ax = sns.countplot(df, x="bedrooms", hue="stories",palette="mako")
104
+ for i in range(len(df['stories'].unique())):
105
+ ax.bar_label(ax.containers[i], fontsize=8)
106
+ plt.ylabel('count of stoies')
107
+ plt.show()
108
+ ax = sns.countplot(df, x="bedrooms", hue="furnishingstatus",palette="viridis")
109
+ for i in range(len(df['furnishingstatus'].unique())):
110
+ ax.bar_label(ax.containers[i], fontsize=8)
111
+ plt.ylabel('count of furnishingstatus')
112
+ plt.show()
113
+ ax = sns.countplot(df, x="parking", hue="furnishingstatus",palette="rocket_r")
114
+ for i in range(len(df['furnishingstatus'].unique())):
115
+ ax.bar_label(ax.containers[i], fontsize=8)
116
+ plt.ylabel('count of furnishingstatus')
117
+ plt.show()
118
+ ax = sns.countplot(df, x="stories", hue="furnishingstatus",palette="cubehelix")
119
+ for i in range(len(df['furnishingstatus'].unique())):
120
+ ax.bar_label(ax.containers[i], fontsize=8)
121
+ plt.ylabel('count of furnishingstatus')
122
+ plt.show()
123
+ ax = sns.countplot(df, x="bathrooms", hue="furnishingstatus",palette="rocket")
124
+ for i in range(len(df['furnishingstatus'].unique())):
125
+ ax.bar_label(ax.containers[i], fontsize=8)
126
+ plt.ylabel('count of furnishingstatus')
127
+ plt.show()
128
+ ax = sns.countplot(df, x="bathrooms", hue="prefarea",palette="crest")
129
+ for i in range(len(df['prefarea'].unique())):
130
+ ax.bar_label(ax.containers[i], fontsize=8)
131
+ plt.ylabel('count of prefarea')
132
+ plt.show()
133
+ ax = sns.countplot(df, x="bedrooms", hue="prefarea",palette="cubehelix")
134
+ for i in range(len(df['prefarea'].unique())):
135
+ ax.bar_label(ax.containers[i], fontsize=8)
136
+ plt.ylabel('count of prefarea')
137
+ plt.show()
138
+ ax = sns.countplot(df, x="stories", hue="prefarea",palette="rocket")
139
+ for i in range(len(df['prefarea'].unique())):
140
+ ax.bar_label(ax.containers[i], fontsize=8)
141
+ plt.ylabel('count of prefarea')
142
+ plt.show()
143
+ ax = sns.countplot(df, x="parking", hue="prefarea",palette="flare")
144
+ for i in range(len(df['prefarea'].unique())):
145
+ ax.bar_label(ax.containers[i], fontsize=8)
146
+ plt.ylabel('count of prefarea')
147
+ plt.show()
148
+ ax = sns.countplot(df, x="furnishingstatus", hue="prefarea",palette="rocket")
149
+ for i in range(len(df['prefarea'].unique())):
150
+ ax.bar_label(ax.containers[i], fontsize=8)
151
+ plt.ylabel('count of prefarea')
152
+ plt.legend(loc = 'best')
153
+ plt.show()
154
+ ax = sns.countplot(df, x="bathrooms", hue="hotwaterheating",palette="rocket")
155
+ for i in range(len(df['hotwaterheating'].unique())):
156
+ ax.bar_label(ax.containers[i], fontsize=8)
157
+ plt.ylabel('count of hotwaterheating')
158
+ plt.legend(loc = 'best')
159
+ plt.show()
160
+ ax = sns.countplot(df, x="parking", hue="hotwaterheating",palette="rocket")
161
+ for i in range(len(df['hotwaterheating'].unique())):
162
+ ax.bar_label(ax.containers[i], fontsize=8)
163
+ plt.ylabel('count of hotwaterheating')
164
+ plt.legend(loc = 'best')
165
+ plt.show()
166
+ ax = sns.countplot(df, x="bedrooms", hue="hotwaterheating",palette="rocket")
167
+ for i in range(len(df['hotwaterheating'].unique())):
168
+ ax.bar_label(ax.containers[i], fontsize=8)
169
+ plt.ylabel('count of hotwaterheating')
170
+ plt.legend(loc = 'best')
171
+ plt.show()
172
+ ax = sns.countplot(df, x="stories", hue="hotwaterheating",palette="rocket")
173
+ for i in range(len(df['hotwaterheating'].unique())):
174
+ ax.bar_label(ax.containers[i], fontsize=8)
175
+ plt.ylabel('count of hotwaterheating')
176
+ plt.legend(loc = 'best')
177
+ plt.show()
178
+
179
+ ax = sns.countplot(df, x="mainroad", hue="hotwaterheating",palette="rocket")
180
+ for i in range(len(df['hotwaterheating'].unique())):
181
+ ax.bar_label(ax.containers[i], fontsize=8)
182
+ plt.ylabel('count of hotwaterheating')
183
+ plt.legend(loc = 'best')
184
+ plt.show()
185
+ encoder = LabelEncoder()
186
+ encoding_col = ['furnishingstatus','prefarea','airconditioning','hotwaterheating','basement','guestroom','mainroad']
187
+ for col in encoding_col:
188
+ df[col]=encoder.fit_transform(df[col])
189
+ df
190
+ plt.figure(figsize=(10, 10))
191
+ sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True)
192
+ plt.show()
193
+ x=df.drop(columns=['price'],axis = 1)
194
+ y=df['price']
195
+ scaler = MinMaxScaler()
196
+ x = scaler.fit_transform(x)
197
+ y = scaler.fit_transform(y.values.reshape(-1, 1))
198
+ x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=50)
199
+ ln_model = LinearRegression()
200
+ ln_model.fit(x_train, y_train)
201
+
202
+ y_pred = ln_model.predict(x_test)
203
+ ln_acc = r2_score(y_test, y_pred)
204
+ ln_acc
205
+ y_pred = ln_model.predict(x_test)
206
+ ln_acc = r2_score(y_test, y_pred)
207
+ ln_acc
208
+ dt_model = DecisionTreeRegressor()
209
+ dt_model.fit(x_train, y_train)
210
+ y_pred = dt_model.predict(x_test)
211
+ dt_acc = r2_score(y_test, y_pred)
212
+ dt_acc
213
+ rf_model = RandomForestRegressor(n_estimators=100)
214
+ rf_model.fit(x_train, y_train)
215
+ y_pred = rf_model.predict(x_test)
216
+ rf_acc = r2_score(y_test, y_pred)
217
+ rf_acc
218
+ from sklearn.svm import SVR
219
+ svr_model = SVR(kernel='linear')
220
+ svr_model.fit(x_train, y_train)
221
+ y_pred = svr_model.predict(x_test)
222
+ svr_acc = r2_score(y_test, y_pred)
223
+ svr_acc
224
+ from sklearn.ensemble import GradientBoostingRegressor
225
+ gb_model = GradientBoostingRegressor()
226
+ gb_model.fit(x_train, y_train)
227
+ y_pred = gb_model.predict(x_test)
228
+ gb_acc = r2_score(y_test, y_pred)
229
+ gb_acc
230
+ from sklearn.ensemble import AdaBoostRegressor
231
+ ada_model = AdaBoostRegressor()
232
+ ada_model.fit(x_train, y_train)
233
+ y_pred = ada_model.predict(x_test)
234
+ ada_acc = r2_score(y_test, y_pred)
235
+ ada_acc