hhhar commited on
Commit
ae41b32
·
verified ·
1 Parent(s): 2ea9add

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -7
app.py CHANGED
@@ -9,19 +9,42 @@ import openpyxl
9
  preprocessor_path = 'modelExports/preprocessor.pkl'
10
  preprocessor = joblib.load(preprocessor_path)
11
 
 
 
 
 
 
 
 
12
 
13
  def process_uploaded_file(uploaded_file, required_columns):
14
  try:
15
  file_extension = uploaded_file.name.split('.')[-1].lower()
16
 
17
  if file_extension == 'csv':
18
- df = pd.read_csv(uploaded_file)
 
19
  elif file_extension in ['xlsx', 'xls']:
20
- df = pd.read_excel(uploaded_file, engine='openpyxl')
21
  else:
22
  st.error("Unsupported file format. Please upload a CSV or Excel file.")
23
  return None
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Standardize column names to uppercase and strip spaces
26
  df.columns = df.columns.str.upper().str.strip()
27
  st.write("DataFrame columns:", df.columns.tolist())
@@ -42,14 +65,12 @@ def process_uploaded_file(uploaded_file, required_columns):
42
  st.error(f"Error reading the file: {e}")
43
  return None
44
 
45
-
46
  def predict_with_model(model, data, includes_preprocessor):
47
  if includes_preprocessor:
48
  return model.predict(data)
49
  else:
50
  return model.predict(preprocessor.transform(data))
51
 
52
-
53
  def create_model_output(df, predictions, model_name):
54
  output_df = df[['PID']].copy() # Keep PID from original dataframe
55
  output_df['Prediction'] = predictions
@@ -58,7 +79,6 @@ def create_model_output(df, predictions, model_name):
58
  lambda x: 'Yes' if x == 1 else 'No')
59
  return output_df
60
 
61
-
62
  # Load models and record whether they include the preprocessor
63
  model_folder = 'modelExports'
64
  models = {}
@@ -248,6 +268,9 @@ elif interface == "Batch Prediction":
248
  if df is None:
249
  st.stop()
250
 
 
 
 
251
  # Convert numerical columns to numeric data types
252
  numerical_columns = [
253
  'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
@@ -284,7 +307,7 @@ elif interface == "Batch Prediction":
284
 
285
  try:
286
  # Prepare data for prediction
287
- data_for_prediction = df[required_columns[1:]] # Exclude 'PID'
288
 
289
  if not includes_preprocessor:
290
  data_for_prediction = preprocessor.transform(data_for_prediction)
@@ -311,7 +334,6 @@ elif interface == "Batch Prediction":
311
  else:
312
  st.info('Awaiting CSV or Excel file to be uploaded.')
313
 
314
-
315
  # Sidebar information
316
  st.sidebar.write("### Model Information")
317
  st.sidebar.write(f"Total models available: {len(models)}")
 
9
  preprocessor_path = 'modelExports/preprocessor.pkl'
10
  preprocessor = joblib.load(preprocessor_path)
11
 
12
+ def find_header_row(df, required_columns, max_rows_to_check=10):
13
+ required_columns_upper = [col.upper().strip() for col in required_columns]
14
+ for i in range(min(max_rows_to_check, len(df))):
15
+ row_values = [str(val).upper().strip() for val in df.iloc[i].values]
16
+ if all(col in row_values for col in required_columns_upper):
17
+ return i # Header row found at row i
18
+ return -1 # Header row not found
19
 
20
  def process_uploaded_file(uploaded_file, required_columns):
21
  try:
22
  file_extension = uploaded_file.name.split('.')[-1].lower()
23
 
24
  if file_extension == 'csv':
25
+ # Read the first few rows to check for headers
26
+ df = pd.read_csv(uploaded_file, nrows=10, header=None)
27
  elif file_extension in ['xlsx', 'xls']:
28
+ df = pd.read_excel(uploaded_file, nrows=10, header=None, engine='openpyxl')
29
  else:
30
  st.error("Unsupported file format. Please upload a CSV or Excel file.")
31
  return None
32
 
33
+ header_row = find_header_row(df, required_columns)
34
+
35
+ if header_row == -1:
36
+ st.error(f"Required columns not found in the first {len(df)} rows.")
37
+ st.write("Expected columns:", required_columns)
38
+ st.write("Found data rows:", df.head().values.tolist())
39
+ return None
40
+
41
+ # Re-read the file with the correct header row
42
+ uploaded_file.seek(0) # Reset file pointer
43
+ if file_extension == 'csv':
44
+ df = pd.read_csv(uploaded_file, header=header_row)
45
+ else:
46
+ df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
47
+
48
  # Standardize column names to uppercase and strip spaces
49
  df.columns = df.columns.str.upper().str.strip()
50
  st.write("DataFrame columns:", df.columns.tolist())
 
65
  st.error(f"Error reading the file: {e}")
66
  return None
67
 
 
68
  def predict_with_model(model, data, includes_preprocessor):
69
  if includes_preprocessor:
70
  return model.predict(data)
71
  else:
72
  return model.predict(preprocessor.transform(data))
73
 
 
74
  def create_model_output(df, predictions, model_name):
75
  output_df = df[['PID']].copy() # Keep PID from original dataframe
76
  output_df['Prediction'] = predictions
 
79
  lambda x: 'Yes' if x == 1 else 'No')
80
  return output_df
81
 
 
82
  # Load models and record whether they include the preprocessor
83
  model_folder = 'modelExports'
84
  models = {}
 
268
  if df is None:
269
  st.stop()
270
 
271
+ # Standardize required columns to uppercase and strip spaces
272
+ required_columns_upper = [col.upper().strip() for col in required_columns]
273
+
274
  # Convert numerical columns to numeric data types
275
  numerical_columns = [
276
  'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
 
307
 
308
  try:
309
  # Prepare data for prediction
310
+ data_for_prediction = df[required_columns_upper[1:]] # Exclude 'PID'
311
 
312
  if not includes_preprocessor:
313
  data_for_prediction = preprocessor.transform(data_for_prediction)
 
334
  else:
335
  st.info('Awaiting CSV or Excel file to be uploaded.')
336
 
 
337
  # Sidebar information
338
  st.sidebar.write("### Model Information")
339
  st.sidebar.write(f"Total models available: {len(models)}")