1nox commited on
Commit
8af866a
·
verified ·
1 Parent(s): f6137a6

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +223 -0
  2. encoders.pkl +3 -0
  3. model.pkl +3 -0
  4. scaler.pkl +3 -0
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import os
5
+ import numpy as np
6
+ import joblib
7
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
8
+ import xgboost as xgb
9
+ from pathlib import Path
10
+
11
+
12
+ # Professional Blue Shades for Dark & Light Mode
13
+ HEADER_COLOR = "#0A84FF" # Bright Blue
14
+ SUBHEADER_COLOR = "#007AFF" # iOS Blue
15
+ TEXT_COLOR = "#A6B1C0" # Subtle grayish blue
16
+ INFO_COLOR = "#5AC8FA" # Light Cyan
17
+ PREDICTION_COLOR = "#34C759" # Greenish-Blue
18
+
19
+ # Read uploaded file
20
+ def read_file(uploaded_file):
21
+ file_type = uploaded_file.name.split(".")[-1].lower()
22
+ if file_type == "csv":
23
+ return pd.read_csv(uploaded_file)
24
+ elif file_type in ["xls", "xlsx"]:
25
+ return pd.read_excel(uploaded_file)
26
+ elif file_type == "json":
27
+ return pd.read_json(uploaded_file)
28
+ else:
29
+ st.error("❌ Unsupported file type! Please upload a CSV, Excel, or JSON file.")
30
+ return None
31
+
32
+ # Feature engineering functions
33
+ def split_dimensions(dim):
34
+ """Process dimensions into separate components"""
35
+ if not isinstance(dim, list):
36
+ dim = [np.nan] * 5
37
+ return (dim[:5] + [np.nan] * 5)[:5] # Ensure exactly 5 elements
38
+
39
+ def split_qtd_price(qtd_price):
40
+ """Split quantity and price values"""
41
+ if not isinstance(qtd_price, list) or len(qtd_price) != 2:
42
+ return [np.nan, np.nan]
43
+ return qtd_price
44
+
45
+ def prepare_advanced_features(df):
46
+ """Prepare advanced features for prediction"""
47
+ df_processed = df.copy()
48
+
49
+ # Process dimensions
50
+ if 'Dimensions' in df_processed.columns:
51
+ dimensions_split = df_processed['Dimensions'].apply(split_dimensions).tolist()
52
+ dimensions_df = pd.DataFrame(dimensions_split, columns=['dimx', 'dimy', 'dimz', 'rim', 'pockets'])
53
+ df_processed = pd.concat([df_processed, dimensions_df], axis=1)
54
+
55
+ # Calculate derived features
56
+ df_processed['Volume'] = df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz']
57
+ df_processed['SurfaceArea'] = df_processed['dimx'] * df_processed['dimy']
58
+ df_processed['Perimeter'] = 2 * (df_processed['dimx'] + df_processed['dimy'])
59
+ df_processed['AspectRatio'] = df_processed['dimx'] / df_processed['dimy']
60
+ df_processed['DensityIndex'] = df_processed['Volume'] / (df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz'])
61
+ df_processed['SizeComplexity'] = np.log1p(df_processed['Volume']) * df_processed['AspectRatio']
62
+
63
+ return df_processed
64
+
65
+ def process_input_data(df, selected_features):
66
+ """Process input data for prediction"""
67
+ # Apply feature engineering
68
+ df_processed = prepare_advanced_features(df)
69
+
70
+ # Ensure all required features are present
71
+ for feature in selected_features:
72
+ if feature not in df_processed.columns:
73
+ df_processed[feature] = 0
74
+
75
+ return df_processed[selected_features]
76
+
77
+
78
+ # Load the trained model and transformers into session state
79
+ @st.cache_resource
80
+ def load_models():
81
+ """Load all necessary models and transformers"""
82
+ model_path = Path(__file__).parent / 'model.pkl'
83
+ scaler_path = Path(__file__).parent / 'scaler.pkl'
84
+ encoders_path = Path(__file__).parent / 'encoders.pkl'
85
+
86
+ model = joblib.load(model_path)
87
+ model.set_params(tree_method='hist', device='cpu')
88
+ scaler = joblib.load(scaler_path)
89
+ encoders = joblib.load(encoders_path)
90
+
91
+ # Extract model features (this assumes the model is an XGBRegressor or similar)
92
+ booster = model.get_booster()
93
+ model_features = [
94
+ 'Volume', 'SurfaceArea', 'Perimeter',
95
+ 'SizeComplexity', 'MainCategoryEncoded',
96
+ 'SubCategoryEncoded', 'Quantity'
97
+ ]
98
+
99
+ # Store them in session_state
100
+ st.session_state['model'] = model
101
+ st.session_state['scaler'] = scaler
102
+ st.session_state['encoders'] = encoders
103
+ st.session_state['model_features'] = model_features # Store the model's feature names
104
+
105
+ return model, scaler, encoders, model_features
106
+
107
+
108
+ # Main App
109
+ def main():
110
+ # Ensure models are loaded into session_state
111
+ if 'model' not in st.session_state or 'scaler' not in st.session_state or 'encoders' not in st.session_state:
112
+ load_models() # This will initialize the models in session_state
113
+
114
+ # Get model features from session state
115
+ model_features = st.session_state['model_features']
116
+
117
+ st.markdown(f"<h1 style='color: {HEADER_COLOR}; text-align: center;'>🔹 Filter's Price Prediction App 🔹</h1>", unsafe_allow_html=True)
118
+ st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>This app uses a trained machine learning model to predict filter's prices based on input data.</p>", unsafe_allow_html=True)
119
+ st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>App version model not updated.</p>", unsafe_allow_html=True)
120
+
121
+ # Model and Dataset Info
122
+ st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📊 Model & Dataset Info</h2>", unsafe_allow_html=True)
123
+
124
+ st.markdown(f"<p style='color: {INFO_COLOR};'>📌 Model:</p>", unsafe_allow_html=True)
125
+ st.write("✅ **Type**: XGBRegressor")
126
+ st.write(f"📈 **Features Used**:", model_features)
127
+ st.write("💡 **Target**: Price")
128
+
129
+ st.markdown(f"<p style='color: {INFO_COLOR};'>📚 Dataset:</p>", unsafe_allow_html=True)
130
+ st.write("📋 **Dataset Name**: Filter's Price Dataset")
131
+ st.write("📉 **Number of Rows**: 5,500")
132
+ st.write("📊 **Number of Features**:", len(model_features))
133
+ #st.write("🌐 **Source**: ")
134
+
135
+
136
+ # Manual input section
137
+ st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>✍️ Manual Input</h2>", unsafe_allow_html=True)
138
+ with st.form("manual_input_form"):
139
+ col1, col2 = st.columns(2)
140
+
141
+ with col1:
142
+ dimx = st.number_input("Dimension X", min_value=0.0)
143
+ dimy = st.number_input("Dimension Y", min_value=0.0)
144
+ dimz = st.number_input("Dimension Z", min_value=0.0)
145
+
146
+ with col2:
147
+ quantity = st.number_input("Quantity", min_value=1)
148
+
149
+ # Category input
150
+ category = st.text_input("Main Category", help="Enter the main filter category (e.g., F7, MV/G4)")
151
+ subcategory = st.text_input("Subcategory", help="Enter the filter subcategory (e.g., PL, G4)")
152
+
153
+ submitted = st.form_submit_button("Calculate Price")
154
+
155
+ if submitted:
156
+ try:
157
+ # Create dataframe from manual input
158
+ manual_data = pd.DataFrame({
159
+ 'dimx': [dimx],
160
+ 'dimy': [dimy],
161
+ 'dimz': [dimz],
162
+ 'Quantity': [quantity],
163
+ 'MainCategory': [category],
164
+ 'SubCategory': [subcategory]
165
+ })
166
+
167
+ # Process manual input
168
+ manual_processed = process_input_data(manual_data, model_features)
169
+
170
+ # Display input features and feature engineering
171
+ st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Input Features and Feature Engineering:</h3>", unsafe_allow_html=True)
172
+ st.dataframe(manual_processed) # Display the processed features
173
+
174
+ # Scale the data and make prediction
175
+ manual_scaled = st.session_state['scaler'].transform(manual_processed)
176
+ prediction = st.session_state['model'].predict(manual_scaled)[0]
177
+
178
+ # Display prediction and its explanation
179
+ st.markdown(
180
+ f"<h3 style='color: {TEXT_COLOR}; display: inline;'>🔮 Predicted Price: "
181
+ f"<span style='color: {PREDICTION_COLOR};'>${prediction:.2f}</span></h3>",
182
+ unsafe_allow_html=True
183
+ )
184
+
185
+ except Exception as e:
186
+ st.error(f"Error calculating price: {str(e)}")
187
+
188
+ # Upload CSV for Prediction
189
+ # st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📂 Upload Data for Prediction</h2>", unsafe_allow_html=True)
190
+ # uploaded_file = st.file_uploader("📥 Upload a CSV, Excel, or JSON file", type=["csv", "xlsx", "xls", "json"])
191
+
192
+ # if uploaded_file is not None:
193
+ # input_data = read_file(uploaded_file)
194
+ # if input_data is not None:
195
+ # st.markdown(f"<p style='color: {INFO_COLOR};'>📜 Uploaded Data:</p>", unsafe_allow_html=True)
196
+ # st.dataframe(input_data) # Display uploaded data
197
+
198
+ # # Ensure the required columns exist in the input data
199
+ # if all(feature in input_data.columns for feature in model_features):
200
+ # # Process the input data
201
+ # processed_data = process_input_data(input_data, model_features)
202
+
203
+ # # Display processed features and engineering
204
+ # st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Processed Features and Feature Engineering:</h3>", unsafe_allow_html=True)
205
+ # st.dataframe(processed_data) # Show feature engineering results
206
+
207
+ # # Apply scaling to processed data
208
+ # scaled_data = st.session_state['scaler'].transform(processed_data)
209
+
210
+ # # Make predictions for all rows
211
+ # predictions = st.session_state['model'].predict(scaled_data)
212
+
213
+ # # Add the predictions to the dataframe
214
+ # input_data["Predicted Price"] = predictions
215
+
216
+ # # Display the final table with input features, feature engineering, and the predicted price
217
+ # st.markdown(f"<h3 style='color: {PREDICTION_COLOR};'>🔮 Predictions:</h3>", unsafe_allow_html=True)
218
+ # st.dataframe(input_data) # Display the final table
219
+ # else:
220
+ # st.error(f"❌ Uploaded data must contain the required features: {model_features}")
221
+
222
+ # Run the app
223
+ main()
encoders.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79bea9680f6f1e2d10e644d4fb660f5596ff49e5e3caac2132f5e733c68f88d1
3
+ size 2138
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c02ab571ac8bf936e0a72e28110a0e3369e39866d338a79f3a75bff8fdfe38
3
+ size 472924
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5abbfa31ed73b5f2049c2836f9b53f4524eae28777d2071086c1d28a5da60d9
3
+ size 1183