hhhar commited on
Commit
2ea9add
·
verified ·
1 Parent(s): 43959e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -59
app.py CHANGED
@@ -3,62 +3,46 @@ import joblib
3
  import numpy as np
4
  import os
5
  import pandas as pd
6
- import io
7
  import openpyxl
8
- import re
9
 
10
  # Load the preprocessor
11
  preprocessor_path = 'modelExports/preprocessor.pkl'
12
  preprocessor = joblib.load(preprocessor_path)
13
 
14
 
15
- def find_header_row(df, required_columns, max_rows_to_check=10):
16
- for i in range(min(max_rows_to_check, len(df))):
17
- row_values = [str(val).strip() for val in df.iloc[i].values]
18
- if all(col in row_values for col in required_columns):
19
- return i # Header row found at row i
20
- return -1 # Header row not found
21
-
22
-
23
  def process_uploaded_file(uploaded_file, required_columns):
24
  try:
25
  file_extension = uploaded_file.name.split('.')[-1].lower()
26
 
27
  if file_extension == 'csv':
28
- # Read the first few rows to check for headers
29
- df = pd.read_csv(uploaded_file, nrows=10, header=None)
30
  elif file_extension in ['xlsx', 'xls']:
31
- # Read the first few rows of the Excel file
32
- df = pd.read_excel(uploaded_file, nrows=10, header=None, engine='openpyxl')
33
  else:
34
  st.error("Unsupported file format. Please upload a CSV or Excel file.")
35
  return None
36
 
37
- header_row = find_header_row(df, required_columns)
 
 
38
 
39
- if header_row == -1:
40
- st.error(f"Required columns not found in the first {len(df)} rows.")
41
- st.write("Expected columns:", required_columns)
42
- st.write("Found data rows:", df.head().values.tolist())
43
- return None
44
 
45
- # Re-read the file with the correct header row
46
- uploaded_file.seek(0) # Reset file pointer
47
- if file_extension == 'csv':
48
- df = pd.read_csv(uploaded_file, header=header_row)
49
- else:
50
- df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
51
 
52
- st.write("Column names:", df.columns.tolist())
53
 
54
- # Return the DataFrame without modifying column names
55
  return df
56
  except Exception as e:
57
  st.error(f"Error reading the file: {e}")
58
  return None
59
 
60
 
61
-
62
  def predict_with_model(model, data, includes_preprocessor):
63
  if includes_preprocessor:
64
  return model.predict(data)
@@ -145,29 +129,29 @@ if interface == "Single Prediction":
145
  input_data = {}
146
 
147
  # Categorical inputs
148
- input_data['CRM_PID_Value_Segment'] = st.selectbox(
149
- 'CRM_PID_Value_Segment', crm_pid_value_segment_options)
150
- input_data['EffectiveSegment'] = st.selectbox(
151
- 'EffectiveSegment', effective_segment_options)
152
- input_data['KA_name'] = st.selectbox('KA_name', ka_name_options)
153
 
154
  # Numerical inputs
155
- input_data['Billing_ZIP'] = st.number_input(
156
- 'Billing_ZIP', min_value=0, format="%d")
157
- input_data['Active_subscribers'] = st.number_input(
158
- 'Active_subscribers', min_value=0, format="%d")
159
- input_data['Not_Active_subscribers'] = st.number_input(
160
- 'Not_Active_subscribers', min_value=0, format="%d")
161
- input_data['Suspended_subscribers'] = st.number_input(
162
- 'Suspended_subscribers', min_value=0, format="%d")
163
- input_data['Total_SUBs'] = st.number_input(
164
- 'Total_SUBs', min_value=0, format="%d")
165
- input_data['AvgMobileRevenue'] = st.number_input(
166
- 'AvgMobileRevenue', min_value=0.0, format="%.2f")
167
- input_data['AvgFIXRevenue'] = st.number_input(
168
- 'AvgFIXRevenue', min_value=0.0, format="%.2f")
169
- input_data['TotalRevenue'] = st.number_input(
170
- 'TotalRevenue', min_value=0.0, format="%.2f")
171
  input_data['ARPU'] = st.number_input('ARPU', min_value=0.0, format="%.2f")
172
 
173
  # Predict churn
@@ -175,6 +159,9 @@ if interface == "Single Prediction":
175
  # Convert input data to DataFrame
176
  input_df = pd.DataFrame([input_data])
177
 
 
 
 
178
  # Preprocess the data only if needed
179
  input_data_transformed = preprocessor.transform(input_df)
180
 
@@ -261,16 +248,15 @@ elif interface == "Batch Prediction":
261
  if df is None:
262
  st.stop()
263
 
264
- # Standardize column names to match model expectations
265
- df.columns = df.columns.str.strip().str.upper().str.replace(' ', '_')
266
-
267
- st.write("Standardized DataFrame columns:", df.columns.tolist())
 
 
268
 
269
- # Check for missing columns
270
- missing_columns = [col for col in required_columns if col not in df.columns]
271
- if missing_columns:
272
- st.error(f"The following required columns are missing: {missing_columns}")
273
- st.stop()
274
 
275
  # Fill missing values if any
276
  df.fillna({
 
3
  import numpy as np
4
  import os
5
  import pandas as pd
 
6
  import openpyxl
 
7
 
8
  # Load the preprocessor
9
  preprocessor_path = 'modelExports/preprocessor.pkl'
10
  preprocessor = joblib.load(preprocessor_path)
11
 
12
 
 
 
 
 
 
 
 
 
13
  def process_uploaded_file(uploaded_file, required_columns):
14
  try:
15
  file_extension = uploaded_file.name.split('.')[-1].lower()
16
 
17
  if file_extension == 'csv':
18
+ df = pd.read_csv(uploaded_file)
 
19
  elif file_extension in ['xlsx', 'xls']:
20
+ df = pd.read_excel(uploaded_file, engine='openpyxl')
 
21
  else:
22
  st.error("Unsupported file format. Please upload a CSV or Excel file.")
23
  return None
24
 
25
+ # Standardize column names to uppercase and strip spaces
26
+ df.columns = df.columns.str.upper().str.strip()
27
+ st.write("DataFrame columns:", df.columns.tolist())
28
 
29
+ # Standardize required columns to uppercase and strip spaces
30
+ required_columns_upper = [col.upper().strip() for col in required_columns]
 
 
 
31
 
32
+ # Check if all required columns are present
33
+ missing_columns = [col for col in required_columns_upper if col not in df.columns]
34
+ if missing_columns:
35
+ st.error(f"The following required columns are missing: {missing_columns}")
36
+ return None
 
37
 
38
+ st.write(f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
39
 
 
40
  return df
41
  except Exception as e:
42
  st.error(f"Error reading the file: {e}")
43
  return None
44
 
45
 
 
46
  def predict_with_model(model, data, includes_preprocessor):
47
  if includes_preprocessor:
48
  return model.predict(data)
 
129
  input_data = {}
130
 
131
  # Categorical inputs
132
+ input_data['CRM_PID_VALUE_SEGMENT'] = st.selectbox(
133
+ 'CRM_PID_VALUE_SEGMENT', crm_pid_value_segment_options)
134
+ input_data['EFFECTIVESEGMENT'] = st.selectbox(
135
+ 'EFFECTIVESEGMENT', effective_segment_options)
136
+ input_data['KA_NAME'] = st.selectbox('KA_NAME', ka_name_options)
137
 
138
  # Numerical inputs
139
+ input_data['BILLING_ZIP'] = st.number_input(
140
+ 'BILLING_ZIP', min_value=0, format="%d")
141
+ input_data['ACTIVE_SUBSCRIBERS'] = st.number_input(
142
+ 'ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
143
+ input_data['NOT_ACTIVE_SUBSCRIBERS'] = st.number_input(
144
+ 'NOT_ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
145
+ input_data['SUSPENDED_SUBSCRIBERS'] = st.number_input(
146
+ 'SUSPENDED_SUBSCRIBERS', min_value=0, format="%d")
147
+ input_data['TOTAL_SUBS'] = st.number_input(
148
+ 'TOTAL_SUBS', min_value=0, format="%d")
149
+ input_data['AVGMOBILEREVENUE'] = st.number_input(
150
+ 'AVGMOBILEREVENUE', min_value=0.0, format="%.2f")
151
+ input_data['AVGFIXREVENUE'] = st.number_input(
152
+ 'AVGFIXREVENUE', min_value=0.0, format="%.2f")
153
+ input_data['TOTALREVENUE'] = st.number_input(
154
+ 'TOTALREVENUE', min_value=0.0, format="%.2f")
155
  input_data['ARPU'] = st.number_input('ARPU', min_value=0.0, format="%.2f")
156
 
157
  # Predict churn
 
159
  # Convert input data to DataFrame
160
  input_df = pd.DataFrame([input_data])
161
 
162
+ # Standardize column names to uppercase
163
+ input_df.columns = input_df.columns.str.upper().str.strip()
164
+
165
  # Preprocess the data only if needed
166
  input_data_transformed = preprocessor.transform(input_df)
167
 
 
248
  if df is None:
249
  st.stop()
250
 
251
+ # Convert numerical columns to numeric data types
252
+ numerical_columns = [
253
+ 'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
254
+ 'SUSPENDED_SUBSCRIBERS', 'TOTAL_SUBS', 'AVGMOBILEREVENUE',
255
+ 'AVGFIXREVENUE', 'TOTALREVENUE', 'ARPU'
256
+ ]
257
 
258
+ for col in numerical_columns:
259
+ df[col] = pd.to_numeric(df[col], errors='coerce')
 
 
 
260
 
261
  # Fill missing values if any
262
  df.fillna({