import streamlit as st import pandas as pd import pickle import os import numpy as np import joblib from sklearn.preprocessing import StandardScaler, LabelEncoder import xgboost as xgb from pathlib import Path # Professional Blue Shades for Dark & Light Mode HEADER_COLOR = "#0A84FF" # Bright Blue SUBHEADER_COLOR = "#007AFF" # iOS Blue TEXT_COLOR = "#A6B1C0" # Subtle grayish blue INFO_COLOR = "#5AC8FA" # Light Cyan PREDICTION_COLOR = "#34C759" # Greenish-Blue # Read uploaded file def read_file(uploaded_file): file_type = uploaded_file.name.split(".")[-1].lower() if file_type == "csv": return pd.read_csv(uploaded_file) elif file_type in ["xls", "xlsx"]: return pd.read_excel(uploaded_file) elif file_type == "json": return pd.read_json(uploaded_file) else: st.error("❌ Unsupported file type! Please upload a CSV, Excel, or JSON file.") return None # Feature engineering functions def split_dimensions(dim): """Process dimensions into separate components""" if not isinstance(dim, list): dim = [np.nan] * 5 return (dim[:5] + [np.nan] * 5)[:5] # Ensure exactly 5 elements def split_qtd_price(qtd_price): """Split quantity and price values""" if not isinstance(qtd_price, list) or len(qtd_price) != 2: return [np.nan, np.nan] return qtd_price def prepare_advanced_features(df): """Prepare advanced features for prediction""" df_processed = df.copy() # Process dimensions if 'Dimensions' in df_processed.columns: dimensions_split = df_processed['Dimensions'].apply(split_dimensions).tolist() dimensions_df = pd.DataFrame(dimensions_split, columns=['dimx', 'dimy', 'dimz', 'rim', 'pockets']) df_processed = pd.concat([df_processed, dimensions_df], axis=1) # Calculate derived features df_processed['Volume'] = df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz'] df_processed['SurfaceArea'] = df_processed['dimx'] * df_processed['dimy'] df_processed['Perimeter'] = 2 * (df_processed['dimx'] + df_processed['dimy']) df_processed['AspectRatio'] = df_processed['dimx'] / df_processed['dimy'] df_processed['DensityIndex'] = df_processed['Volume'] / (df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz']) df_processed['SizeComplexity'] = np.log1p(df_processed['Volume']) * df_processed['AspectRatio'] return df_processed def process_input_data(df, selected_features): """Process input data for prediction""" # Apply feature engineering df_processed = prepare_advanced_features(df) # Ensure all required features are present for feature in selected_features: if feature not in df_processed.columns: df_processed[feature] = 0 return df_processed[selected_features] # Load the trained model and transformers into session state @st.cache_resource def load_models(): """Load all necessary models and transformers""" model_path = Path(__file__).parent / 'model.pkl' scaler_path = Path(__file__).parent / 'scaler.pkl' encoders_path = Path(__file__).parent / 'encoders.pkl' model = joblib.load(model_path) model.set_params(tree_method='hist', device='cpu') scaler = joblib.load(scaler_path) encoders = joblib.load(encoders_path) # Extract model features (this assumes the model is an XGBRegressor or similar) booster = model.get_booster() model_features = [ 'Volume', 'SurfaceArea', 'Perimeter', 'SizeComplexity', 'MainCategoryEncoded', 'SubCategoryEncoded', 'Quantity' ] return model, scaler, encoders, model_features # Main App def main(): # Ensure models are loaded into session_state model, scaler, encoders, model_features = load_models() st.session_state['model'] = model st.session_state['scaler'] = scaler st.session_state['encoders'] = encoders st.session_state['model_features'] = model_features st.markdown(f"

🔹 Filter's Price Prediction App 🔹

", unsafe_allow_html=True) st.markdown(f"

This app uses a trained machine learning model to predict filter's prices based on input data.

", unsafe_allow_html=True) st.markdown(f"

App version model not updated.

", unsafe_allow_html=True) # Model and Dataset Info st.markdown(f"

📊 Model & Dataset Info

", unsafe_allow_html=True) st.markdown(f"

📌 Model:

", unsafe_allow_html=True) st.write("✅ **Type**: XGBRegressor") st.write(f"📈 **Features Used**:", model_features) st.write("💡 **Target**: Price") st.markdown(f"

📚 Dataset:

", unsafe_allow_html=True) st.write("📋 **Dataset Name**: Filter's Price Dataset") st.write("📉 **Number of Rows**: 5,500") st.write("📊 **Number of Features**:", len(model_features)) #st.write("🌐 **Source**: ") # Manual input section st.markdown(f"

✍️ Manual Input

", unsafe_allow_html=True) with st.form("manual_input_form"): col1, col2 = st.columns(2) with col1: dimx = st.number_input("Dimension X", min_value=0.0) dimy = st.number_input("Dimension Y", min_value=0.0) dimz = st.number_input("Dimension Z", min_value=0.0) with col2: quantity = st.number_input("Quantity", min_value=1) # Category input category = st.text_input("Main Category", help="Enter the main filter category (e.g., F7, MV/G4)") subcategory = st.text_input("Subcategory", help="Enter the filter subcategory (e.g., PL, G4)") submitted = st.form_submit_button("Calculate Price") if submitted: try: # Create dataframe from manual input manual_data = pd.DataFrame({ 'dimx': [dimx], 'dimy': [dimy], 'dimz': [dimz], 'Quantity': [quantity], 'MainCategory': [category], 'SubCategory': [subcategory] }) # Process manual input manual_processed = process_input_data(manual_data, model_features) # Display input features and feature engineering st.markdown(f"

📝 Input Features and Feature Engineering:

", unsafe_allow_html=True) st.dataframe(manual_processed) # Display the processed features # Scale the data and make prediction manual_scaled = st.session_state['scaler'].transform(manual_processed) prediction = st.session_state['model'].predict(manual_scaled)[0] # Display prediction and its explanation st.markdown( f"

🔮 Predicted Price: " f"${prediction:.2f}

", unsafe_allow_html=True ) except Exception as e: st.error(f"Error calculating price: {str(e)}") # Upload CSV for Prediction # st.markdown(f"

📂 Upload Data for Prediction

", unsafe_allow_html=True) # uploaded_file = st.file_uploader("📥 Upload a CSV, Excel, or JSON file", type=["csv", "xlsx", "xls", "json"]) # if uploaded_file is not None: # input_data = read_file(uploaded_file) # if input_data is not None: # st.markdown(f"

📜 Uploaded Data:

", unsafe_allow_html=True) # st.dataframe(input_data) # Display uploaded data # # Ensure the required columns exist in the input data # if all(feature in input_data.columns for feature in model_features): # # Process the input data # processed_data = process_input_data(input_data, model_features) # # Display processed features and engineering # st.markdown(f"

📝 Processed Features and Feature Engineering:

", unsafe_allow_html=True) # st.dataframe(processed_data) # Show feature engineering results # # Apply scaling to processed data # scaled_data = st.session_state['scaler'].transform(processed_data) # # Make predictions for all rows # predictions = st.session_state['model'].predict(scaled_data) # # Add the predictions to the dataframe # input_data["Predicted Price"] = predictions # # Display the final table with input features, feature engineering, and the predicted price # st.markdown(f"

🔮 Predictions:

", unsafe_allow_html=True) # st.dataframe(input_data) # Display the final table # else: # st.error(f"❌ Uploaded data must contain the required features: {model_features}") # Run the app main()