Spaces:

hhhar
/

ChurnPredUpdated

Sleeping

App Files Files Community

hhhar commited on Oct 5, 2024

Commit

f1cb155

verified ·

1 Parent(s): 357b625

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import joblib
 import numpy as np
 import os
 import pandas as pd
 # Load the preprocessor
 preprocessor_path = 'modelExports/preprocessor.pkl'
@@ -15,43 +17,58 @@ def find_header_row(df, required_columns, max_rows_to_check=5):
             return i
     return -1
 def process_uploaded_file(uploaded_file, required_columns):
     try:
-        # Read the first few rows to check for headers
-        df = pd.read_csv(uploaded_file, nrows=5)
         header_row = find_header_row(df, required_columns)
         if header_row == -1:
             st.error(f"Required columns not found in the first {5} rows.")
             return None
         # Re-read the file with the correct header row
         uploaded_file.seek(0)  # Reset file pointer
-        df = pd.read_csv(uploaded_file, header=header_row)
-        st.write(
-            f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
         return df
     except Exception as e:
-        st.error(f"Error reading the CSV file: {e}")
         return None
 def predict_with_model(model, data, includes_preprocessor):
     if includes_preprocessor:
         return model.predict(data)
     else:
         return model.predict(preprocessor.transform(data))
 def create_model_output(df, predictions, model_name):
-    output_df = df.copy()
     output_df['Prediction'] = predictions
     output_df['Churn Probability'] = predictions
     return output_df
 # Load models and record whether they include the preprocessor
 model_folder = 'modelExports'
 models = {}
@@ -212,15 +229,14 @@ if interface == "Single Prediction":
 elif interface == "Batch Prediction":
     # Batch Prediction Interface
     st.header('Batch Prediction')
-    st.write('Upload a CSV file containing customer data.')
-    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
         # Check if models are selected
         if not selected_models:
-            st.error(
-                "No models selected for prediction. Please select at least one model in the sidebar.")
             st.stop()
         required_columns = [
@@ -258,10 +274,8 @@ elif interface == "Batch Prediction":
             includes_preprocessor = models_with_preprocessor[model_name]
             try:
-                predictions = predict_with_model(
-                    model, df, includes_preprocessor)
-                model_outputs[model_name] = create_model_output(
-                    df, predictions, model_name)
                 st.success(f"Predictions completed for {model_name}")
             except Exception as e:
                 st.error(f"Error predicting with model {model_name}: {e}")
@@ -279,7 +293,7 @@ elif interface == "Batch Prediction":
             )
     else:
-        st.info('Awaiting CSV file to be uploaded.')
 # Sidebar information
 st.sidebar.write("### Model Information")

 import numpy as np
 import os
 import pandas as pd
+import io
+import openpyxl
 # Load the preprocessor
 preprocessor_path = 'modelExports/preprocessor.pkl'
             return i
     return -1
 def process_uploaded_file(uploaded_file, required_columns):
     try:
+        file_extension = uploaded_file.name.split('.')[-1].lower()
+        if file_extension == 'csv':
+            # Read the first few rows to check for headers
+            df = pd.read_csv(uploaded_file, nrows=5)
+        elif file_extension in ['xlsx', 'xls']:
+            # Read the first few rows of the Excel file
+            df = pd.read_excel(uploaded_file, nrows=5, engine='openpyxl')
+        else:
+            st.error("Unsupported file format. Please upload a CSV or Excel file.")
+            return None
         header_row = find_header_row(df, required_columns)
         if header_row == -1:
             st.error(f"Required columns not found in the first {5} rows.")
             return None
         # Re-read the file with the correct header row
         uploaded_file.seek(0)  # Reset file pointer
+        if file_extension == 'csv':
+            df = pd.read_csv(uploaded_file, header=header_row)
+        else:
+            df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
+        st.write(f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
+        # Check for PID column
+        if 'PID' not in df.columns:
+            st.error("PID column not found in the uploaded file.")
+            return None
         return df
     except Exception as e:
+        st.error(f"Error reading the file: {e}")
         return None
 def predict_with_model(model, data, includes_preprocessor):
     if includes_preprocessor:
         return model.predict(data)
     else:
         return model.predict(preprocessor.transform(data))
 def create_model_output(df, predictions, model_name):
+    output_df = df[['PID']].copy()  # Only keep PID from original dataframe
     output_df['Prediction'] = predictions
     output_df['Churn Probability'] = predictions
+    output_df['Churn'] = output_df['Prediction'].apply(lambda x: 'Yes' if x == 1 else 'No')
     return output_df
 # Load models and record whether they include the preprocessor
 model_folder = 'modelExports'
 models = {}
 elif interface == "Batch Prediction":
     # Batch Prediction Interface
     st.header('Batch Prediction')
+    st.write('Upload a CSV or Excel file containing customer data.')
+    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=["csv", "xlsx", "xls"])
     if uploaded_file is not None:
         # Check if models are selected
         if not selected_models:
+            st.error("No models selected for prediction. Please select at least one model in the sidebar.")
             st.stop()
         required_columns = [
             includes_preprocessor = models_with_preprocessor[model_name]
             try:
+                predictions = predict_with_model(model, df[required_columns], includes_preprocessor)
+                model_outputs[model_name] = create_model_output(df, predictions, model_name)
                 st.success(f"Predictions completed for {model_name}")
             except Exception as e:
                 st.error(f"Error predicting with model {model_name}: {e}")
             )
     else:
+        st.info('Awaiting CSV or Excel file to be uploaded.')
 # Sidebar information
 st.sidebar.write("### Model Information")