Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import pickle | |
| import os | |
| import numpy as np | |
| import joblib | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| import xgboost as xgb | |
| from pathlib import Path | |
| # Professional Blue Shades for Dark & Light Mode | |
| HEADER_COLOR = "#0A84FF" # Bright Blue | |
| SUBHEADER_COLOR = "#007AFF" # iOS Blue | |
| TEXT_COLOR = "#A6B1C0" # Subtle grayish blue | |
| INFO_COLOR = "#5AC8FA" # Light Cyan | |
| PREDICTION_COLOR = "#34C759" # Greenish-Blue | |
| # Read uploaded file | |
| def read_file(uploaded_file): | |
| file_type = uploaded_file.name.split(".")[-1].lower() | |
| if file_type == "csv": | |
| return pd.read_csv(uploaded_file) | |
| elif file_type in ["xls", "xlsx"]: | |
| return pd.read_excel(uploaded_file) | |
| elif file_type == "json": | |
| return pd.read_json(uploaded_file) | |
| else: | |
| st.error("❌ Unsupported file type! Please upload a CSV, Excel, or JSON file.") | |
| return None | |
| # Feature engineering functions | |
| def split_dimensions(dim): | |
| """Process dimensions into separate components""" | |
| if not isinstance(dim, list): | |
| dim = [np.nan] * 5 | |
| return (dim[:5] + [np.nan] * 5)[:5] # Ensure exactly 5 elements | |
| def split_qtd_price(qtd_price): | |
| """Split quantity and price values""" | |
| if not isinstance(qtd_price, list) or len(qtd_price) != 2: | |
| return [np.nan, np.nan] | |
| return qtd_price | |
| def prepare_advanced_features(df): | |
| """Prepare advanced features for prediction""" | |
| df_processed = df.copy() | |
| # Process dimensions | |
| if 'Dimensions' in df_processed.columns: | |
| dimensions_split = df_processed['Dimensions'].apply(split_dimensions).tolist() | |
| dimensions_df = pd.DataFrame(dimensions_split, columns=['dimx', 'dimy', 'dimz', 'rim', 'pockets']) | |
| df_processed = pd.concat([df_processed, dimensions_df], axis=1) | |
| # Calculate derived features | |
| df_processed['Volume'] = df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz'] | |
| df_processed['SurfaceArea'] = df_processed['dimx'] * df_processed['dimy'] | |
| df_processed['Perimeter'] = 2 * (df_processed['dimx'] + df_processed['dimy']) | |
| df_processed['AspectRatio'] = df_processed['dimx'] / df_processed['dimy'] | |
| df_processed['DensityIndex'] = df_processed['Volume'] / (df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz']) | |
| df_processed['SizeComplexity'] = np.log1p(df_processed['Volume']) * df_processed['AspectRatio'] | |
| return df_processed | |
| def process_input_data(df, selected_features): | |
| """Process input data for prediction""" | |
| # Apply feature engineering | |
| df_processed = prepare_advanced_features(df) | |
| # Ensure all required features are present | |
| for feature in selected_features: | |
| if feature not in df_processed.columns: | |
| df_processed[feature] = 0 | |
| return df_processed[selected_features] | |
| # Load the trained model and transformers into session state | |
| def load_models(): | |
| """Load all necessary models and transformers""" | |
| model_path = Path(__file__).parent / 'model.pkl' | |
| scaler_path = Path(__file__).parent / 'scaler.pkl' | |
| encoders_path = Path(__file__).parent / 'encoders.pkl' | |
| model = joblib.load(model_path) | |
| model.set_params(tree_method='hist', device='cpu') | |
| scaler = joblib.load(scaler_path) | |
| encoders = joblib.load(encoders_path) | |
| # Extract model features (this assumes the model is an XGBRegressor or similar) | |
| booster = model.get_booster() | |
| model_features = [ | |
| 'Volume', 'SurfaceArea', 'Perimeter', | |
| 'SizeComplexity', 'MainCategoryEncoded', | |
| 'SubCategoryEncoded', 'Quantity' | |
| ] | |
| return model, scaler, encoders, model_features | |
| # Main App | |
| def main(): | |
| # Ensure models are loaded into session_state | |
| model, scaler, encoders, model_features = load_models() | |
| st.session_state['model'] = model | |
| st.session_state['scaler'] = scaler | |
| st.session_state['encoders'] = encoders | |
| st.session_state['model_features'] = model_features | |
| st.markdown(f"<h1 style='color: {HEADER_COLOR}; text-align: center;'>🔹 Filter's Price Prediction App 🔹</h1>", unsafe_allow_html=True) | |
| st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>This app uses a trained machine learning model to predict filter's prices based on input data.</p>", unsafe_allow_html=True) | |
| st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>App version model not updated.</p>", unsafe_allow_html=True) | |
| # Model and Dataset Info | |
| st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📊 Model & Dataset Info</h2>", unsafe_allow_html=True) | |
| st.markdown(f"<p style='color: {INFO_COLOR};'>📌 Model:</p>", unsafe_allow_html=True) | |
| st.write("✅ **Type**: XGBRegressor") | |
| st.write(f"📈 **Features Used**:", model_features) | |
| st.write("💡 **Target**: Price") | |
| st.markdown(f"<p style='color: {INFO_COLOR};'>📚 Dataset:</p>", unsafe_allow_html=True) | |
| st.write("📋 **Dataset Name**: Filter's Price Dataset") | |
| st.write("📉 **Number of Rows**: 5,500") | |
| st.write("📊 **Number of Features**:", len(model_features)) | |
| #st.write("🌐 **Source**: ") | |
| # Manual input section | |
| st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>✍️ Manual Input</h2>", unsafe_allow_html=True) | |
| with st.form("manual_input_form"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| dimx = st.number_input("Dimension X", min_value=0.0) | |
| dimy = st.number_input("Dimension Y", min_value=0.0) | |
| dimz = st.number_input("Dimension Z", min_value=0.0) | |
| with col2: | |
| quantity = st.number_input("Quantity", min_value=1) | |
| # Category input | |
| category = st.text_input("Main Category", help="Enter the main filter category (e.g., F7, MV/G4)") | |
| subcategory = st.text_input("Subcategory", help="Enter the filter subcategory (e.g., PL, G4)") | |
| submitted = st.form_submit_button("Calculate Price") | |
| if submitted: | |
| try: | |
| # Create dataframe from manual input | |
| manual_data = pd.DataFrame({ | |
| 'dimx': [dimx], | |
| 'dimy': [dimy], | |
| 'dimz': [dimz], | |
| 'Quantity': [quantity], | |
| 'MainCategory': [category], | |
| 'SubCategory': [subcategory] | |
| }) | |
| # Process manual input | |
| manual_processed = process_input_data(manual_data, model_features) | |
| # Display input features and feature engineering | |
| st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Input Features and Feature Engineering:</h3>", unsafe_allow_html=True) | |
| st.dataframe(manual_processed) # Display the processed features | |
| # Scale the data and make prediction | |
| manual_scaled = st.session_state['scaler'].transform(manual_processed) | |
| prediction = st.session_state['model'].predict(manual_scaled)[0] | |
| # Display prediction and its explanation | |
| st.markdown( | |
| f"<h3 style='color: {TEXT_COLOR}; display: inline;'>🔮 Predicted Price: " | |
| f"<span style='color: {PREDICTION_COLOR};'>${prediction:.2f}</span></h3>", | |
| unsafe_allow_html=True | |
| ) | |
| except Exception as e: | |
| st.error(f"Error calculating price: {str(e)}") | |
| # Upload CSV for Prediction | |
| # st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📂 Upload Data for Prediction</h2>", unsafe_allow_html=True) | |
| # uploaded_file = st.file_uploader("📥 Upload a CSV, Excel, or JSON file", type=["csv", "xlsx", "xls", "json"]) | |
| # if uploaded_file is not None: | |
| # input_data = read_file(uploaded_file) | |
| # if input_data is not None: | |
| # st.markdown(f"<p style='color: {INFO_COLOR};'>📜 Uploaded Data:</p>", unsafe_allow_html=True) | |
| # st.dataframe(input_data) # Display uploaded data | |
| # # Ensure the required columns exist in the input data | |
| # if all(feature in input_data.columns for feature in model_features): | |
| # # Process the input data | |
| # processed_data = process_input_data(input_data, model_features) | |
| # # Display processed features and engineering | |
| # st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Processed Features and Feature Engineering:</h3>", unsafe_allow_html=True) | |
| # st.dataframe(processed_data) # Show feature engineering results | |
| # # Apply scaling to processed data | |
| # scaled_data = st.session_state['scaler'].transform(processed_data) | |
| # # Make predictions for all rows | |
| # predictions = st.session_state['model'].predict(scaled_data) | |
| # # Add the predictions to the dataframe | |
| # input_data["Predicted Price"] = predictions | |
| # # Display the final table with input features, feature engineering, and the predicted price | |
| # st.markdown(f"<h3 style='color: {PREDICTION_COLOR};'>🔮 Predictions:</h3>", unsafe_allow_html=True) | |
| # st.dataframe(input_data) # Display the final table | |
| # else: | |
| # st.error(f"❌ Uploaded data must contain the required features: {model_features}") | |
| # Run the app | |
| main() | |