import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import LabelEncoder, MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor from sklearn.svm import SVR from sklearn.metrics import r2_score # Uyarıları gizle import warnings warnings.filterwarnings("ignore") # Veri Yükleme ve Ön İşleme @st.cache_data def load_data(): df = pd.read_csv('Housing.csv') # Gereksiz sütunu sil (eğer varsa) if 'date' in df.columns: df = df.drop('date', axis=1) # Encoding encoding_col = ['furnishingstatus', 'prefarea', 'airconditioning', 'hotwaterheating', 'basement', 'guestroom', 'mainroad'] encoder = LabelEncoder() for col in encoding_col: df[col] = encoder.fit_transform(df[col]) return df df = load_data() # Model Eğitimi Fonksiyonu def train_and_evaluate_model(model, X_train, X_test, y_train, y_test): model.fit(X_train, y_train) y_pred = model.predict(X_test) r2 = r2_score(y_test, y_pred) return r2 # Streamlit Arayüzü st.title("Ev Fiyat Tahmini Uygulaması") # Kenar Çubuğu - Model Seçimi st.sidebar.header("Model Seçimi") selected_model = st.sidebar.selectbox("Model Seçin", ["Linear Regression", "Decision Tree", "Random Forest", "SVR", "Gradient Boosting", "AdaBoost"]) # Kenar Çubuğu - Veri Seti İstatistikleri st.sidebar.header("Veri Seti İstatistikleri") if st.sidebar.checkbox("İstatistikleri Göster"): st.subheader("Veri Seti İstatistikleri") st.write(df.describe()) # Kenar Çubuğu - Grafikler st.sidebar.header("Grafikler") if st.sidebar.checkbox("Grafikleri Göster"): st.subheader("Grafikler") # Count of Bedrooms st.subheader("Oda Sayısı Dağılımı") bedrooms_count = df['bedrooms'].value_counts() fig, ax = plt.subplots(figsize=(8, 3)) sns.barplot(x=bedrooms_count.index, y=bedrooms_count.values, palette="rocket_r", ax=ax) for p in ax.patches: ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', fontsize=8, color='black', xytext=(0, 5), textcoords='offset points') st.pyplot(fig) # Count of Bathrooms st.subheader("Banyo Sayısı Dağılımı") bathrooms_count = df['bathrooms'].value_counts() fig, ax = plt.subplots() sns.barplot(x=bathrooms_count.index, y=bathrooms_count.values, palette="mako", ax=ax) for p in ax.patches: ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', fontsize=8, color='black', xytext=(0, 5), textcoords='offset points') st.pyplot(fig) # Count of Stories st.subheader("Kat Sayısı Dağılımı") stories_count = df['stories'].value_counts() fig, ax = plt.subplots() sns.barplot(x=stories_count.index, y=stories_count.values, palette="magma", ax=ax) for p in ax.patches: ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', fontsize=8, color='black', xytext=(0, 5), textcoords='offset points') st.pyplot(fig) # Count of Mainroad st.subheader("Ana Yola Bağlantı Dağılımı") mainroad_count = df['mainroad'].value_counts() fig, ax = plt.subplots() explode = [0, 0.09] colors = sns.color_palette("crest") patches, texts, autotexts = ax.pie(mainroad_count.values, labels=mainroad_count.index, autopct='%.0f%%', explode=explode, colors=colors) for autotext in autotexts: autotext.set_color('black') plt.title("Ana Yola Bağlantı") plt.legend(loc="best") st.pyplot(fig) # Count of Guestroom st.subheader("Misafir Odası Dağılımı") guestroom_count = df['guestroom'].value_counts() fig, ax = plt.subplots() explode = [0, 0.09] colors = sns.color_palette("crest") patches, texts, autotexts = ax.pie(guestroom_count.values, labels=guestroom_count.index, autopct='%.0f%%', explode=explode, colors=colors) for autotext in autotexts: autotext.set_color('black') plt.title("Misafir Odası") plt.legend(loc="best") st.pyplot(fig) # Count of Furnishing Status st.subheader("Eşya Durumu Dağılımı") furnishingstatus_count = df['furnishingstatus'].value_counts() fig, ax = plt.subplots() sns.barplot(x=furnishingstatus_count.index, y=furnishingstatus_count.values, palette="magma", ax=ax) for p in ax.patches: ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', fontsize=8, color='black', xytext=(0, 5), textcoords='offset points') st.pyplot(fig) # Count of Prefarea st.subheader("Tercih Edilen Bölge Dağılımı") prefarea_count = df['prefarea'].value_counts() fig, ax = plt.subplots() explode = [0, 0.09] colors = sns.color_palette("magma") patches, texts, autotexts = ax.pie(prefarea_count.values, labels=prefarea_count.index, autopct='%.0f%%', explode=explode, colors=colors) for autotext in autotexts: autotext.set_color('black') plt.title("Tercih Edilen Bölge") plt.legend(loc="best") st.pyplot(fig) # Correlation Heatmap st.subheader("Korelasyon Matrisi") fig, ax = plt.subplots(figsize=(10, 10)) sns.heatmap(df.corr(), annot=True, fmt=".2f", linewidths=0.5, cbar=True, ax=ax) st.pyplot(fig) # Ana Bölüm - Model Sonuçları ve Tahmin st.header("Model Sonuçları") # Veri Bölme ve Ölçeklendirme X = df.drop(columns=['price'], axis=1) y = df['price'] scaler = MinMaxScaler() X = scaler.fit_transform(X) y = scaler.fit_transform(y.values.reshape(-1, 1)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50) # Model Seçimine Göre Sonuçları Gösterme if selected_model == "Linear Regression": model = LinearRegression() elif selected_model == "Decision Tree": model = DecisionTreeRegressor() elif selected_model == "Random Forest": model = RandomForestRegressor(n_estimators=100) elif selected_model == "SVR": model = SVR(kernel='linear') elif selected_model == "Gradient Boosting": model = GradientBoostingRegressor() elif selected_model == "AdaBoost": model = AdaBoostRegressor() r2 = train_and_evaluate_model(model, X_train, X_test, y_train, y_test) st.write(f"{selected_model} R-kare Değeri: {r2:.3f}")