Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,19 +9,42 @@ import openpyxl
|
|
| 9 |
preprocessor_path = 'modelExports/preprocessor.pkl'
|
| 10 |
preprocessor = joblib.load(preprocessor_path)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
def process_uploaded_file(uploaded_file, required_columns):
|
| 14 |
try:
|
| 15 |
file_extension = uploaded_file.name.split('.')[-1].lower()
|
| 16 |
|
| 17 |
if file_extension == 'csv':
|
| 18 |
-
|
|
|
|
| 19 |
elif file_extension in ['xlsx', 'xls']:
|
| 20 |
-
df = pd.read_excel(uploaded_file, engine='openpyxl')
|
| 21 |
else:
|
| 22 |
st.error("Unsupported file format. Please upload a CSV or Excel file.")
|
| 23 |
return None
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# Standardize column names to uppercase and strip spaces
|
| 26 |
df.columns = df.columns.str.upper().str.strip()
|
| 27 |
st.write("DataFrame columns:", df.columns.tolist())
|
|
@@ -42,14 +65,12 @@ def process_uploaded_file(uploaded_file, required_columns):
|
|
| 42 |
st.error(f"Error reading the file: {e}")
|
| 43 |
return None
|
| 44 |
|
| 45 |
-
|
| 46 |
def predict_with_model(model, data, includes_preprocessor):
|
| 47 |
if includes_preprocessor:
|
| 48 |
return model.predict(data)
|
| 49 |
else:
|
| 50 |
return model.predict(preprocessor.transform(data))
|
| 51 |
|
| 52 |
-
|
| 53 |
def create_model_output(df, predictions, model_name):
|
| 54 |
output_df = df[['PID']].copy() # Keep PID from original dataframe
|
| 55 |
output_df['Prediction'] = predictions
|
|
@@ -58,7 +79,6 @@ def create_model_output(df, predictions, model_name):
|
|
| 58 |
lambda x: 'Yes' if x == 1 else 'No')
|
| 59 |
return output_df
|
| 60 |
|
| 61 |
-
|
| 62 |
# Load models and record whether they include the preprocessor
|
| 63 |
model_folder = 'modelExports'
|
| 64 |
models = {}
|
|
@@ -248,6 +268,9 @@ elif interface == "Batch Prediction":
|
|
| 248 |
if df is None:
|
| 249 |
st.stop()
|
| 250 |
|
|
|
|
|
|
|
|
|
|
| 251 |
# Convert numerical columns to numeric data types
|
| 252 |
numerical_columns = [
|
| 253 |
'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
|
|
@@ -284,7 +307,7 @@ elif interface == "Batch Prediction":
|
|
| 284 |
|
| 285 |
try:
|
| 286 |
# Prepare data for prediction
|
| 287 |
-
data_for_prediction = df[
|
| 288 |
|
| 289 |
if not includes_preprocessor:
|
| 290 |
data_for_prediction = preprocessor.transform(data_for_prediction)
|
|
@@ -311,7 +334,6 @@ elif interface == "Batch Prediction":
|
|
| 311 |
else:
|
| 312 |
st.info('Awaiting CSV or Excel file to be uploaded.')
|
| 313 |
|
| 314 |
-
|
| 315 |
# Sidebar information
|
| 316 |
st.sidebar.write("### Model Information")
|
| 317 |
st.sidebar.write(f"Total models available: {len(models)}")
|
|
|
|
| 9 |
preprocessor_path = 'modelExports/preprocessor.pkl'
|
| 10 |
preprocessor = joblib.load(preprocessor_path)
|
| 11 |
|
| 12 |
+
def find_header_row(df, required_columns, max_rows_to_check=10):
|
| 13 |
+
required_columns_upper = [col.upper().strip() for col in required_columns]
|
| 14 |
+
for i in range(min(max_rows_to_check, len(df))):
|
| 15 |
+
row_values = [str(val).upper().strip() for val in df.iloc[i].values]
|
| 16 |
+
if all(col in row_values for col in required_columns_upper):
|
| 17 |
+
return i # Header row found at row i
|
| 18 |
+
return -1 # Header row not found
|
| 19 |
|
| 20 |
def process_uploaded_file(uploaded_file, required_columns):
|
| 21 |
try:
|
| 22 |
file_extension = uploaded_file.name.split('.')[-1].lower()
|
| 23 |
|
| 24 |
if file_extension == 'csv':
|
| 25 |
+
# Read the first few rows to check for headers
|
| 26 |
+
df = pd.read_csv(uploaded_file, nrows=10, header=None)
|
| 27 |
elif file_extension in ['xlsx', 'xls']:
|
| 28 |
+
df = pd.read_excel(uploaded_file, nrows=10, header=None, engine='openpyxl')
|
| 29 |
else:
|
| 30 |
st.error("Unsupported file format. Please upload a CSV or Excel file.")
|
| 31 |
return None
|
| 32 |
|
| 33 |
+
header_row = find_header_row(df, required_columns)
|
| 34 |
+
|
| 35 |
+
if header_row == -1:
|
| 36 |
+
st.error(f"Required columns not found in the first {len(df)} rows.")
|
| 37 |
+
st.write("Expected columns:", required_columns)
|
| 38 |
+
st.write("Found data rows:", df.head().values.tolist())
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
# Re-read the file with the correct header row
|
| 42 |
+
uploaded_file.seek(0) # Reset file pointer
|
| 43 |
+
if file_extension == 'csv':
|
| 44 |
+
df = pd.read_csv(uploaded_file, header=header_row)
|
| 45 |
+
else:
|
| 46 |
+
df = pd.read_excel(uploaded_file, header=header_row, engine='openpyxl')
|
| 47 |
+
|
| 48 |
# Standardize column names to uppercase and strip spaces
|
| 49 |
df.columns = df.columns.str.upper().str.strip()
|
| 50 |
st.write("DataFrame columns:", df.columns.tolist())
|
|
|
|
| 65 |
st.error(f"Error reading the file: {e}")
|
| 66 |
return None
|
| 67 |
|
|
|
|
| 68 |
def predict_with_model(model, data, includes_preprocessor):
|
| 69 |
if includes_preprocessor:
|
| 70 |
return model.predict(data)
|
| 71 |
else:
|
| 72 |
return model.predict(preprocessor.transform(data))
|
| 73 |
|
|
|
|
| 74 |
def create_model_output(df, predictions, model_name):
|
| 75 |
output_df = df[['PID']].copy() # Keep PID from original dataframe
|
| 76 |
output_df['Prediction'] = predictions
|
|
|
|
| 79 |
lambda x: 'Yes' if x == 1 else 'No')
|
| 80 |
return output_df
|
| 81 |
|
|
|
|
| 82 |
# Load models and record whether they include the preprocessor
|
| 83 |
model_folder = 'modelExports'
|
| 84 |
models = {}
|
|
|
|
| 268 |
if df is None:
|
| 269 |
st.stop()
|
| 270 |
|
| 271 |
+
# Standardize required columns to uppercase and strip spaces
|
| 272 |
+
required_columns_upper = [col.upper().strip() for col in required_columns]
|
| 273 |
+
|
| 274 |
# Convert numerical columns to numeric data types
|
| 275 |
numerical_columns = [
|
| 276 |
'BILLING_ZIP', 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS',
|
|
|
|
| 307 |
|
| 308 |
try:
|
| 309 |
# Prepare data for prediction
|
| 310 |
+
data_for_prediction = df[required_columns_upper[1:]] # Exclude 'PID'
|
| 311 |
|
| 312 |
if not includes_preprocessor:
|
| 313 |
data_for_prediction = preprocessor.transform(data_for_prediction)
|
|
|
|
| 334 |
else:
|
| 335 |
st.info('Awaiting CSV or Excel file to be uploaded.')
|
| 336 |
|
|
|
|
| 337 |
# Sidebar information
|
| 338 |
st.sidebar.write("### Model Information")
|
| 339 |
st.sidebar.write(f"Total models available: {len(models)}")
|