Spaces:

hhhar
/

ChurnPredUpdated

Sleeping

App Files Files Community

hhhar commited on Oct 5, 2024

Commit

ae41b32

verified ·

1 Parent(s): 2ea9add

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -7

app.py CHANGED Viewed

@@ -9,19 +9,42 @@ import openpyxl
 preprocessor_path = 'modelExports/preprocessor.pkl'
 preprocessor = joblib.load(preprocessor_path)
 def process_uploaded_file(uploaded_file, required_columns):
     try:
         file_extension = uploaded_file.name.split('.')[-1].lower()
         if file_extension == 'csv':
-            df = pd.read_csv(uploaded_file)
         elif file_extension in ['xlsx', 'xls']:
-            df = pd.read_excel(uploaded_file, engine='openpyxl')
         else:
             st.error("Unsupported file format. Please upload a CSV or Excel file.")
             return None
         # Standardize column names to uppercase and strip spaces
         df.columns = df.columns.str.upper().str.strip()
         st.write("DataFrame columns:", df.columns.tolist())
@@ -42,14 +65,12 @@ def process_uploaded_file(uploaded_file, required_columns):
         st.error(f"Error reading the file: {e}")
         return None
 def predict_with_model(model, data, includes_preprocessor):
     if includes_preprocessor:
         return model.predict(data)
     else:
         return model.predict(preprocessor.transform(data))
 def create_model_output(df, predictions, model_name):
     output_df = df[['PID']].copy()  # Keep PID from original dataframe
     output_df['Prediction'] = predictions
@@ -58,7 +79,6 @@ def create_model_output(df, predictions, model_name):
         lambda x: 'Yes' if x == 1 else 'No')
     return output_df
 # Load models and record whether they include the preprocessor
 model_folder = 'modelExports'
 models = {}
@@ -248,6 +268,9 @@ elif interface == "Batch Prediction":
         if df is None:
             st.stop()
         # Convert numerical columns to numeric data types
         numerical_columns = [
             'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
@@ -284,7 +307,7 @@ elif interface == "Batch Prediction":
             try:
                 # Prepare data for prediction
-                data_for_prediction = df[required_columns[1:]]  # Exclude 'PID'
                 if not includes_preprocessor:
                     data_for_prediction = preprocessor.transform(data_for_prediction)
@@ -311,7 +334,6 @@ elif interface == "Batch Prediction":
     else:
         st.info('Awaiting CSV or Excel file to be uploaded.')
 # Sidebar information
 st.sidebar.write("### Model Information")
 st.sidebar.write(f"Total models available: {len(models)}")

 preprocessor_path = 'modelExports/preprocessor.pkl'
 preprocessor = joblib.load(preprocessor_path)
+def find_header_row(df, required_columns, max_rows_to_check=10):
+    required_columns_upper = [col.upper().strip() for col in required_columns]
+    for i in range(min(max_rows_to_check, len(df))):
+        row_values = [str(val).upper().strip() for val in df.iloc[i].values]
+        if all(col in row_values for col in required_columns_upper):
+            return i  # Header row found at row i
+    return -1  # Header row not found
 def process_uploaded_file(uploaded_file, required_columns):
     try:
         file_extension = uploaded_file.name.split('.')[-1].lower()
         if file_extension == 'csv':
+            # Read the first few rows to check for headers
+            df = pd.read_csv(uploaded_file, nrows=10, header=None)
         elif file_extension in ['xlsx', 'xls']:
+            df = pd.read_excel(uploaded_file, nrows=10, header=None, engine='openpyxl')
         else:
             st.error("Unsupported file format. Please upload a CSV or Excel file.")
             return None
+        header_row = find_header_row(df, required_columns)
+        if header_row == -1:
+            st.error(f"Required columns not found in the first {len(df)} rows.")
+            st.write("Expected columns:", required_columns)
+            st.write("Found data rows:", df.head().values.tolist())
+            return None
+        # Re-read the file with the correct header row
+        uploaded_file.seek(0)  # Reset file pointer
+        if file_extension == 'csv':
+            df = pd.read_csv(uploaded_file, header=header_row)
+        else:
+            df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
         # Standardize column names to uppercase and strip spaces
         df.columns = df.columns.str.upper().str.strip()
         st.write("DataFrame columns:", df.columns.tolist())
         st.error(f"Error reading the file: {e}")
         return None
 def predict_with_model(model, data, includes_preprocessor):
     if includes_preprocessor:
         return model.predict(data)
     else:
         return model.predict(preprocessor.transform(data))
 def create_model_output(df, predictions, model_name):
     output_df = df[['PID']].copy()  # Keep PID from original dataframe
     output_df['Prediction'] = predictions
         lambda x: 'Yes' if x == 1 else 'No')
     return output_df
 # Load models and record whether they include the preprocessor
 model_folder = 'modelExports'
 models = {}
         if df is None:
             st.stop()
+        # Standardize required columns to uppercase and strip spaces
+        required_columns_upper = [col.upper().strip() for col in required_columns]
         # Convert numerical columns to numeric data types
         numerical_columns = [
             'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
             try:
                 # Prepare data for prediction
+                data_for_prediction = df[required_columns_upper[1:]]  # Exclude 'PID'
                 if not includes_preprocessor:
                     data_for_prediction = preprocessor.transform(data_for_prediction)
     else:
         st.info('Awaiting CSV or Excel file to be uploaded.')
 # Sidebar information
 st.sidebar.write("### Model Information")
 st.sidebar.write(f"Total models available: {len(models)}")