hhhar commited on
Commit
f1cb155
·
verified ·
1 Parent(s): 357b625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -3,6 +3,8 @@ import joblib
3
  import numpy as np
4
  import os
5
  import pandas as pd
 
 
6
 
7
  # Load the preprocessor
8
  preprocessor_path = 'modelExports/preprocessor.pkl'
@@ -15,43 +17,58 @@ def find_header_row(df, required_columns, max_rows_to_check=5):
15
  return i
16
  return -1
17
 
18
-
19
  def process_uploaded_file(uploaded_file, required_columns):
20
  try:
21
- # Read the first few rows to check for headers
22
- df = pd.read_csv(uploaded_file, nrows=5)
 
 
 
 
 
 
 
 
 
 
23
  header_row = find_header_row(df, required_columns)
24
-
25
  if header_row == -1:
26
  st.error(f"Required columns not found in the first {5} rows.")
27
  return None
28
-
29
  # Re-read the file with the correct header row
30
  uploaded_file.seek(0) # Reset file pointer
31
- df = pd.read_csv(uploaded_file, header=header_row)
32
-
33
- st.write(
34
- f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
 
 
 
 
 
 
 
 
35
  return df
36
  except Exception as e:
37
- st.error(f"Error reading the CSV file: {e}")
38
  return None
39
 
40
-
41
  def predict_with_model(model, data, includes_preprocessor):
42
  if includes_preprocessor:
43
  return model.predict(data)
44
  else:
45
  return model.predict(preprocessor.transform(data))
46
 
47
-
48
  def create_model_output(df, predictions, model_name):
49
- output_df = df.copy()
50
  output_df['Prediction'] = predictions
51
  output_df['Churn Probability'] = predictions
 
52
  return output_df
53
 
54
-
55
  # Load models and record whether they include the preprocessor
56
  model_folder = 'modelExports'
57
  models = {}
@@ -212,15 +229,14 @@ if interface == "Single Prediction":
212
  elif interface == "Batch Prediction":
213
  # Batch Prediction Interface
214
  st.header('Batch Prediction')
215
- st.write('Upload a CSV file containing customer data.')
216
 
217
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
218
 
219
  if uploaded_file is not None:
220
  # Check if models are selected
221
  if not selected_models:
222
- st.error(
223
- "No models selected for prediction. Please select at least one model in the sidebar.")
224
  st.stop()
225
 
226
  required_columns = [
@@ -258,10 +274,8 @@ elif interface == "Batch Prediction":
258
  includes_preprocessor = models_with_preprocessor[model_name]
259
 
260
  try:
261
- predictions = predict_with_model(
262
- model, df, includes_preprocessor)
263
- model_outputs[model_name] = create_model_output(
264
- df, predictions, model_name)
265
  st.success(f"Predictions completed for {model_name}")
266
  except Exception as e:
267
  st.error(f"Error predicting with model {model_name}: {e}")
@@ -279,7 +293,7 @@ elif interface == "Batch Prediction":
279
  )
280
 
281
  else:
282
- st.info('Awaiting CSV file to be uploaded.')
283
 
284
  # Sidebar information
285
  st.sidebar.write("### Model Information")
 
3
  import numpy as np
4
  import os
5
  import pandas as pd
6
+ import io
7
+ import openpyxl
8
 
9
  # Load the preprocessor
10
  preprocessor_path = 'modelExports/preprocessor.pkl'
 
17
  return i
18
  return -1
19
 
 
20
  def process_uploaded_file(uploaded_file, required_columns):
21
  try:
22
+ file_extension = uploaded_file.name.split('.')[-1].lower()
23
+
24
+ if file_extension == 'csv':
25
+ # Read the first few rows to check for headers
26
+ df = pd.read_csv(uploaded_file, nrows=5)
27
+ elif file_extension in ['xlsx', 'xls']:
28
+ # Read the first few rows of the Excel file
29
+ df = pd.read_excel(uploaded_file, nrows=5, engine='openpyxl')
30
+ else:
31
+ st.error("Unsupported file format. Please upload a CSV or Excel file.")
32
+ return None
33
+
34
  header_row = find_header_row(df, required_columns)
35
+
36
  if header_row == -1:
37
  st.error(f"Required columns not found in the first {5} rows.")
38
  return None
39
+
40
  # Re-read the file with the correct header row
41
  uploaded_file.seek(0) # Reset file pointer
42
+ if file_extension == 'csv':
43
+ df = pd.read_csv(uploaded_file, header=header_row)
44
+ else:
45
+ df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
46
+
47
+ st.write(f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
48
+
49
+ # Check for PID column
50
+ if 'PID' not in df.columns:
51
+ st.error("PID column not found in the uploaded file.")
52
+ return None
53
+
54
  return df
55
  except Exception as e:
56
+ st.error(f"Error reading the file: {e}")
57
  return None
58
 
 
59
  def predict_with_model(model, data, includes_preprocessor):
60
  if includes_preprocessor:
61
  return model.predict(data)
62
  else:
63
  return model.predict(preprocessor.transform(data))
64
 
 
65
  def create_model_output(df, predictions, model_name):
66
+ output_df = df[['PID']].copy() # Only keep PID from original dataframe
67
  output_df['Prediction'] = predictions
68
  output_df['Churn Probability'] = predictions
69
+ output_df['Churn'] = output_df['Prediction'].apply(lambda x: 'Yes' if x == 1 else 'No')
70
  return output_df
71
 
 
72
  # Load models and record whether they include the preprocessor
73
  model_folder = 'modelExports'
74
  models = {}
 
229
  elif interface == "Batch Prediction":
230
  # Batch Prediction Interface
231
  st.header('Batch Prediction')
232
+ st.write('Upload a CSV or Excel file containing customer data.')
233
 
234
+ uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=["csv", "xlsx", "xls"])
235
 
236
  if uploaded_file is not None:
237
  # Check if models are selected
238
  if not selected_models:
239
+ st.error("No models selected for prediction. Please select at least one model in the sidebar.")
 
240
  st.stop()
241
 
242
  required_columns = [
 
274
  includes_preprocessor = models_with_preprocessor[model_name]
275
 
276
  try:
277
+ predictions = predict_with_model(model, df[required_columns], includes_preprocessor)
278
+ model_outputs[model_name] = create_model_output(df, predictions, model_name)
 
 
279
  st.success(f"Predictions completed for {model_name}")
280
  except Exception as e:
281
  st.error(f"Error predicting with model {model_name}: {e}")
 
293
  )
294
 
295
  else:
296
+ st.info('Awaiting CSV or Excel file to be uploaded.')
297
 
298
  # Sidebar information
299
  st.sidebar.write("### Model Information")