Haticece commited on
Commit
2bf4ab3
·
verified ·
1 Parent(s): d465628

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -93
app.py CHANGED
@@ -1,112 +1,202 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import matplotlib.pyplot as plt
4
  import seaborn as sns
 
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.linear_model import LogisticRegression
7
  from sklearn.preprocessing import LabelEncoder, StandardScaler
8
  from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
9
  from io import StringIO
10
 
 
 
11
 
12
- # Load the dataset
13
- @st.cache
14
  def load_data():
15
- return pd.read_csv('churn.csv')
16
-
17
- data = load_data()
18
-
19
- # Sidebar options
20
- st.sidebar.title("Navigation")
21
- options = ["Data Overview", "Visualization", "Churn Prediction"]
22
- choice = st.sidebar.radio("Go to", options)
23
-
24
- if choice == "Data Overview":
25
- st.title("Customer Churn Dataset")
26
- st.write("### Dataset Head")
27
- st.write(data.head())
28
-
29
- st.write("### Dataset Information")
30
- # Create a buffer for the info output
31
- buffer = StringIO()
32
- data.info(buf=buffer)
33
- s = buffer.getvalue() # Get the output from the buffer
34
- st.text(s)
35
-
36
- st.write("### Dataset Statistics")
37
- st.write(data.describe())
38
-
39
- elif choice == "Visualization":
40
- st.title("Data Visualization")
41
-
42
- # Churn distribution
43
- st.write("### Churn Distribution")
44
- fig, ax = plt.subplots()
45
- sns.countplot(data['Churn'], ax=ax)
46
- st.pyplot(fig)
47
-
48
- # Gender distribution by churn
49
- st.write("### Gender Distribution by Churn")
50
- fig, ax = plt.subplots()
51
- sns.countplot(x='gender', hue='Churn', data=data, ax=ax)
52
- st.pyplot(fig)
53
-
54
- elif choice == "Churn Prediction":
55
- st.title("Churn Prediction")
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Preprocess the data
58
- df = data.copy()
59
- df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
60
- df = df.dropna()
 
 
 
 
 
61
  label_enc = LabelEncoder()
62
- for col in df.select_dtypes(include=['object']).columns:
63
- if col != 'customerID':
64
- df[col] = label_enc.fit_transform(df[col])
65
-
66
- X = df.drop(columns=['Churn', 'customerID'])
67
- y = df['Churn']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
69
 
70
- # Model training
71
  scaler = StandardScaler()
72
  X_train = scaler.fit_transform(X_train)
73
  X_test = scaler.transform(X_test)
74
-
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  model = LogisticRegression()
76
- model.fit(X_train, y_train)
77
-
78
- # User input for prediction
79
- st.write("### Input Customer Data for Prediction")
80
- input_data = {col: st.text_input(col, "0") for col in X.columns}
81
- if st.button("Predict"):
82
- input_df = pd.DataFrame([input_data])
83
- input_df = input_df.astype(float)
84
- scaled_input = scaler.transform(input_df)
85
- prediction = model.predict(scaled_input)
86
- st.write("### Prediction: ", "Churn" if prediction[0] == 1 else "No Churn")
87
-
88
- # Evaluation metrics
89
- st.write("### Model Performance")
90
- y_pred = model.predict(X_test)
91
- st.text(classification_report(y_test, y_pred))
92
-
93
- # Confusion matrix
94
- st.write("### Confusion Matrix")
95
- fig, ax = plt.subplots()
96
- sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', ax=ax)
97
- st.pyplot(fig)
98
-
99
- # ROC Curve
100
- st.write("### ROC Curve")
101
- y_prob = model.predict_proba(X_test)[:, 1] # Probabilities for the positive class
102
- fpr, tpr, thresholds = roc_curve(y_test, y_prob)
103
- roc_auc = auc(fpr, tpr)
104
-
105
- fig, ax = plt.subplots()
106
- ax.plot(fpr, tpr, color='blue', lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
107
- ax.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
108
- ax.set_xlabel("False Positive Rate")
109
- ax.set_ylabel("True Positive Rate")
110
- ax.set_title("Receiver Operating Characteristic (ROC) Curve")
111
- ax.legend(loc="lower right")
112
- st.pyplot(fig)
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import pickle
4
  import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.linear_model import LogisticRegression
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
  from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
10
  from io import StringIO
11
 
12
+ # Sayfa ayarları
13
+ st.set_page_config(page_title="Müşteri Kaybı Tahmin Uygulaması", page_icon=":telephone_receiver:", layout="wide")
14
 
15
+ # Veriyi yükle (sadece bir kere yüklemek için @st.cache kullanıyoruz)
16
+ @st.cache_data()
17
  def load_data():
18
+ df = pd.read_csv('churn.csv')
19
+ return df
20
+
21
+ df = load_data()
22
+
23
+ # --- Arayüz ---
24
+
25
+ st.title("Müşteri Kaybı Tahmin Uygulaması")
26
+
27
+ # --- Sidebar (Sol Menü) ---
28
+ st.sidebar.header("Navigasyon")
29
+ page = st.sidebar.radio("Sayfa Seçin:", ["Veri İnceleme", "Model ve Tahmin"])
30
+
31
+ # --- Veri İnceleme Sayfası ---
32
+ if page == "Veri İnceleme":
33
+ st.header("Veri Seti İnceleme")
34
+
35
+ if st.checkbox("Veri Setini Göster"):
36
+ st.subheader("Veri Seti")
37
+ st.dataframe(df)
38
+
39
+ if st.checkbox("Özet İstatistikleri Göster"):
40
+ st.subheader("Özet İstatistikler")
41
+ st.write(df.describe())
42
+
43
+ if st.checkbox("Sütun Bilgilerini Göster"):
44
+ st.subheader("Sütun Bilgileri")
45
+ buffer = StringIO()
46
+ df.info(buf=buffer)
47
+ s = buffer.getvalue()
48
+ st.text(s)
49
+
50
+ # --- Görselleştirme ---
51
+ st.header("Veri Görselleştirme")
52
+ if st.checkbox("Sayısal Değişken Dağılımları"):
53
+ st.subheader("Sayısal Değişken Dağılımları")
54
+ for col in ['tenure', 'MonthlyCharges', 'TotalCharges']:
55
+ fig, ax = plt.subplots()
56
+ sns.histplot(df[col], kde=True, ax=ax)
57
+ st.pyplot(fig)
58
+
59
+ if st.checkbox("Kategorik Değişken Dağılımları"):
60
+ st.subheader("Kategorik Değişken Dağılımları")
61
+ for col in ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
62
+ 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
63
+ 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', "Churn"]:
64
+ fig, ax = plt.subplots()
65
+ sns.countplot(x=col, data=df, ax=ax)
66
+ st.pyplot(fig)
67
+
68
+ if st.checkbox("Churn Dağılımı"):
69
+ st.subheader("Churn Dağılımı")
70
+ fig, ax = plt.subplots()
71
+ sns.countplot(x='Churn', data=df, ax=ax)
72
+ st.pyplot(fig)
73
+
74
+ if st.checkbox("Cinsiyete Göre Churn Dağılımı"):
75
+ st.subheader("Cinsiyete Göre Churn Dağılımı")
76
+ fig, ax = plt.subplots()
77
+ sns.countplot(x='gender', hue='Churn', data=df, ax=ax)
78
+ st.pyplot(fig)
79
+
80
+ # --- Model ve Tahmin Sayfası ---
81
+ elif page == "Model ve Tahmin":
82
+ st.header("Müşteri Kaybı Tahmini")
83
+
84
  # Preprocess the data
85
+ df_model = df.copy()
86
+
87
+
88
+ # TotalCharges sütununu sayısal yap ve eksik değerleri doldur
89
+ df_model['TotalCharges'] = pd.to_numeric(df_model['TotalCharges'], errors='coerce')
90
+ df_model['TotalCharges'].fillna(df_model['TotalCharges'].median(), inplace=True)
91
+
92
+ # Kategorik sütunları Label Encoding ile sayısal hale getir
93
  label_enc = LabelEncoder()
94
+ for col in df_model.select_dtypes(include=['object']).columns:
95
+ if col != 'customerID':
96
+ df_model[col] = label_enc.fit_transform(df_model[col])
97
+
98
+ # --- Kullanıcıdan girdi al ---
99
+
100
+ def user_input_features():
101
+ features = {}
102
+ col1, col2 = st.columns(2)
103
+ with col1:
104
+ features['gender'] = st.selectbox("Cinsiyet", df['gender'].unique())
105
+ features['SeniorCitizen'] = st.selectbox("Yaşlı Mı?", df['SeniorCitizen'].unique())
106
+ features['Partner'] = st.selectbox("Partneri Var Mı?", df['Partner'].unique())
107
+ features['Dependents'] = st.selectbox("Bağımlı Kişi Var Mı?", df['Dependents'].unique())
108
+ features['PhoneService'] = st.selectbox("Telefon Hizmeti Var Mı?", df['PhoneService'].unique())
109
+
110
+ # MultipleLines için özel durum (PhoneService'e göre seçenekleri güncelle)
111
+ if features['PhoneService'] == 'Yes':
112
+ features['MultipleLines'] = st.selectbox("Çoklu Hat Var Mı?", ['Yes', 'No'])
113
+ else:
114
+ features['MultipleLines'] = st.selectbox("Çoklu Hat Var Mı?", ['No phone service'])
115
+
116
+ features['OnlineSecurity'] = st.selectbox("Çevrimiçi Güvenlik Var Mı?", df['OnlineSecurity'].unique())
117
+ features['OnlineBackup'] = st.selectbox("Çevrimiçi Yedekleme Var Mı?", df['OnlineBackup'].unique())
118
+
119
+ with col2:
120
+ features['DeviceProtection'] = st.selectbox("Cihaz Koruması Var Mı?", df['DeviceProtection'].unique())
121
+ features['TechSupport'] = st.selectbox("Teknik Destek Var Mı?", df['TechSupport'].unique())
122
+ features['StreamingTV'] = st.selectbox("TV Yayını Var Mı?", df['StreamingTV'].unique())
123
+ features['StreamingMovies'] = st.selectbox("Film Yayını Var Mı?", df['StreamingMovies'].unique())
124
+ features['Contract'] = st.selectbox("Sözleşme Türü", df['Contract'].unique())
125
+ features['PaperlessBilling'] = st.selectbox("Kağıtsız Fatura Var Mı?", df['PaperlessBilling'].unique())
126
+ features['PaymentMethod'] = st.selectbox("Ödeme Yöntemi", df['PaymentMethod'].unique())
127
+ features['tenure'] = st.slider("Müşteri Olma Süresi (Ay)", 0, 72, 12)
128
+ features['MonthlyCharges'] = st.slider("Aylık Ücret", 0, 150, 50)
129
+ features['TotalCharges'] = st.slider("Toplam Ücret", 0, int(df['TotalCharges'].max()), int(df['TotalCharges'].median()))
130
+
131
+ # InternetService'i sona ekle, çünkü diğer özelliklerin seçimine bağlı
132
+ features['InternetService'] = st.selectbox("İnternet Servisi", df['InternetService'].unique())
133
+ features["Churn"] = 0
134
+ return pd.DataFrame(features, index=[0])
135
+
136
+ input_df = user_input_features()
137
+
138
+ # --- Modeli Eğit ve Tahmin Yap ---
139
+
140
+ X = df_model.drop(columns=['Churn', 'customerID'])
141
+ y = df_model['Churn']
142
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
143
 
 
144
  scaler = StandardScaler()
145
  X_train = scaler.fit_transform(X_train)
146
  X_test = scaler.transform(X_test)
147
+
148
+ for col in input_df.select_dtypes(include=['object']).columns:
149
+ input_df[col] = input_df[col].astype(str) # Her ihtimale karşı, object tipindeki sütunları stringe çevir
150
+
151
+ for col in input_df.columns:
152
+ if input_df[col].dtype == object or input_df[col].dtype == str :
153
+ input_df[col] = pd.Categorical(input_df[col], categories=df[col].unique())
154
+ input_df[col] = input_df[col].cat.codes
155
+
156
+ input_df = input_df.drop(columns=["Churn"])
157
+ input_df = scaler.transform(input_df)
158
+
159
+ # Modeli yükle
160
+
161
  model = LogisticRegression()
162
+ model.fit(X_train, y_train) # Modeli tekrar burada eğitiyoruz
163
+
164
+ if st.button('Tahmin Yap'):
165
+ prediction = model.predict(input_df)
166
+ prediction_proba = model.predict_proba(input_df)
167
+
168
+ st.subheader("Tahmin Sonucu:")
169
+ if prediction[0] == 0:
170
+ st.success("Bu müşterinin kayıp **OLMAYACAĞI** tahmin ediliyor. :thumbsup:")
171
+ else:
172
+ st.error("Bu müşterinin kayıp **OLACAĞI** tahmin ediliyor. :thumbsdown:")
173
+
174
+ st.subheader("Tahmin Olasılıkları:")
175
+ st.write(f"Kayıp Olmama Olasılığı: **{prediction_proba[0][0]:.2f}**")
176
+ st.write(f"Kayıp Olma Olasılığı: **{prediction_proba[0][1]:.2f}**")
177
+
178
+ # --- Model Performansı ---
179
+ st.header("Model Performansı")
180
+ y_pred = model.predict(X_test)
181
+
182
+ st.subheader("Sınıflandırma Raporu")
183
+ st.text(classification_report(y_test, y_pred))
184
+
185
+ st.subheader("Karışıklık Matrisi")
186
+ fig, ax = plt.subplots()
187
+ sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', ax=ax)
188
+ st.pyplot(fig)
189
+
190
+ st.subheader("ROC Eğrisi")
191
+ y_prob = model.predict_proba(X_test)[:, 1]
192
+ fpr, tpr, thresholds = roc_curve(y_test, y_prob)
193
+ roc_auc = auc(fpr, tpr)
194
+
195
+ fig, ax = plt.subplots()
196
+ ax.plot(fpr, tpr, color='blue', lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
197
+ ax.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
198
+ ax.set_xlabel("False Positive Rate")
199
+ ax.set_ylabel("True Positive Rate")
200
+ ax.set_title("Receiver Operating Characteristic (ROC) Curve")
201
+ ax.legend(loc="lower right")
202
+ st.pyplot(fig)