prasanthr0416 commited on
Commit
040f2f4
·
verified ·
1 Parent(s): 0468b42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -227
app.py CHANGED
@@ -3,17 +3,12 @@ import pandas as pd
3
  import numpy as np
4
  import os
5
  import pickle
6
- import joblib
7
 
8
- st.set_page_config(
9
- page_title="Household Power Consumption Prediction",
10
- page_icon="⚡",
11
- layout="wide"
12
- )
13
 
14
  # Hugging Face compatible paths
15
  RAW_FEATURES_CSV = "raw_features.csv"
16
- MODEL_PKL = "trained_models/decision_tree_model.pkl" # Changed to your decision tree model
17
  SCALER_PKL = "trained_models/scaler.pkl"
18
 
19
  FEATURES = [
@@ -31,267 +26,184 @@ NUMERIC_COLS_TO_SCALE = [
31
 
32
  SUBMETER_COLS = ['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
33
 
34
- # Enhanced loading with better error handling
35
  @st.cache_resource
36
  def load_csv(path):
37
- """Load CSV file with multiple fallback options"""
38
- if os.path.exists(path):
39
- return pd.read_csv(path)
40
-
41
- # Try alternative paths
42
- alternative_paths = [
43
- "./raw_features.csv",
44
- "data/raw_features.csv",
45
- "../raw_features.csv"
46
- ]
47
-
48
- for alt_path in alternative_paths:
49
- if os.path.exists(alt_path):
50
- return pd.read_csv(alt_path)
51
-
52
- st.warning(f"CSV file not found. Using default values.")
53
- return None
54
 
55
  @st.cache_resource
56
- def load_model_pickle(path):
57
- """Load pickle file with error handling"""
58
- try:
59
- if os.path.exists(path):
60
- with open(path, "rb") as f:
61
- return pickle.load(f)
62
-
63
- # Try with joblib (more reliable)
64
- if os.path.exists(path):
65
- return joblib.load(path)
66
-
67
- except Exception as e:
68
- st.error(f"Error loading model: {e}")
69
-
70
- return None
71
 
72
- @st.cache_resource
73
- def load_scaler_pickle(path):
74
- """Load scaler pickle file"""
75
- try:
76
- if os.path.exists(path):
77
- # Try standard pickle first
78
- with open(path, "rb") as f:
79
- return pickle.load(f)
80
-
81
- # Try joblib
82
- if os.path.exists(path):
83
- return joblib.load(path)
84
-
85
- except Exception as e:
86
- st.error(f"Error loading scaler: {e}")
87
-
88
- return None
89
-
90
- # Load data and models
91
  raw_df = load_csv(RAW_FEATURES_CSV)
92
- model = load_model_pickle(MODEL_PKL)
93
- scaler = load_scaler_pickle(SCALER_PKL)
 
 
 
 
94
 
95
- # Initialize session state for suggestions
 
 
 
 
 
96
  if 'suggestion_pools' not in st.session_state:
97
  st.session_state.suggestion_pools = {}
98
 
99
- # Build suggestion pools
100
  def build_pool_for_feature(feat):
101
- """Create suggestion values for each feature"""
102
- if raw_df is not None and feat in raw_df.columns:
103
  vals = raw_df[feat].dropna().unique().tolist()
104
- if len(vals) > 0:
105
- # Take first 5 unique values for suggestions
106
- return vals[:5]
107
-
108
- # Default values if CSV not loaded
109
- if feat == 'Hour':
110
- return list(range(0, 24))
111
- elif feat in SUBMETER_COLS:
112
- return [0.0, 1.0, 2.0, 5.0, 10.0]
113
- elif 'Voltage' in feat:
114
- return [230.0, 235.0, 240.0, 245.0, 250.0]
115
  else:
116
- return [0.0, 0.5, 1.0, 1.5, 2.0]
 
 
 
 
 
117
 
118
- # Initialize suggestion pools
119
  for feat in FEATURES:
120
  st.session_state.suggestion_pools[feat] = build_pool_for_feature(feat)
121
 
122
  # Pre-fill sample input
123
- def generate_random_values():
124
- """Generate random values for all features"""
125
  for feat, pool in st.session_state.suggestion_pools.items():
126
- if len(pool) > 0:
127
  val = np.random.choice(pool)
128
- else:
129
  val = 0 if feat == 'Hour' else 0.0
130
-
131
- # Store in session state
132
  if feat == 'Hour':
133
- st.session_state[f"input_{feat}"] = int(float(val))
 
134
  else:
135
- st.session_state[f"input_{feat}"] = float(val)
136
-
137
- # Initialize random values if not exists
138
- if 'initialized' not in st.session_state:
139
- generate_random_values()
140
- st.session_state.initialized = True
141
-
142
- # UI Layout
143
- st.title("⚡ Household Power Consumption Prediction")
144
- st.markdown("Predict Global Active Power using Decision Tree Model")
145
-
146
- # Sidebar for info
147
- with st.sidebar:
148
- st.header("ℹ️ Information")
149
- st.markdown("""
150
- **Features Used:**
151
- - Global Reactive Power
152
- - Voltage
153
- - Sub-metering 1, 2, 3
154
- - Daily averages
155
- - Time features (Hour, Peak hours, Daytime)
156
- """)
157
-
158
- if model is not None:
159
- st.success("✅ Decision Tree Model Loaded")
160
- else:
161
- st.error("❌ Model not loaded")
162
-
163
- if scaler is not None:
164
- st.success("✅ Scaler Loaded")
165
- else:
166
- st.error("❌ Scaler not loaded")
167
 
168
- # Generate Random Values Button
169
- col1, col2 = st.columns([1, 3])
170
- with col1:
171
- if st.button("🎲 Generate Random Values", use_container_width=True):
172
- generate_random_values()
173
- st.rerun()
174
 
175
- # Input fields in columns
176
- st.header("📝 Input Features")
177
 
178
- cols = st.columns(2)
179
- input_values = {}
 
180
 
181
- for i, feat in enumerate(FEATURES):
 
 
 
182
  if feat in ['Is_peak_hour', 'Is_daytime']:
183
  continue
184
-
185
- col = cols[i % 2]
186
-
187
  if feat == 'Hour':
188
- default_val = st.session_state.get(f"input_{feat}", 12)
189
- val = col.number_input(
190
- "Hour (0-23)",
191
- min_value=0,
192
- max_value=23,
193
- value=int(default_val),
194
- step=1,
195
- key=f"num_{feat}"
196
- )
197
- input_values[feat] = val
198
  else:
199
- # Show suggestion from pool
200
- suggestions = st.session_state.suggestion_pools.get(feat, [])
201
- suggestion_text = ""
202
- if suggestions:
203
- suggestion_text = f"Suggestions: {', '.join([f'{s:.2f}' for s in suggestions[:3]])}"
204
-
205
- default_val = st.session_state.get(f"input_{feat}", 0.0)
206
- val = col.number_input(
207
- f"{feat}",
208
- value=float(default_val),
209
- format="%.4f",
210
- key=f"num_{feat}",
211
- help=suggestion_text
212
- )
213
- input_values[feat] = val
214
-
215
- # Auto-calculate flags
216
- hour_val = input_values.get('Hour', 12)
217
- input_values['Is_daytime'] = 1 if (6 <= hour_val < 18) else 0
218
- input_values['Is_peak_hour'] = 1 if (17 <= hour_val <= 20) else 0
219
-
220
- # Display input preview
221
- st.markdown("### 📊 Input Preview")
222
- preview_df = pd.DataFrame([input_values])
223
- # Reorder columns to match FEATURES order
224
- preview_df = preview_df[FEATURES]
225
- st.dataframe(preview_df.style.format("{:.4f}"), use_container_width=True)
 
 
226
 
227
  st.markdown("---")
 
228
 
229
- # Prediction section
230
- st.header("🔮 Prediction")
231
-
232
- predict_col1, predict_col2 = st.columns([1, 3])
233
- with predict_col1:
234
- predict_btn = st.button("🚀 Predict Global Active Power", type="primary", use_container_width=True)
235
 
 
236
  if predict_btn:
237
- # Validate inputs
238
- missing = [feat for feat in FEATURES
239
- if feat not in input_values or input_values[feat] is None]
240
-
241
- if missing:
242
- st.error(f"❌ Missing values for: {', '.join(missing)}")
243
  st.stop()
244
-
245
- # Check model and scaler
 
246
  if model is None:
247
- st.error("Model not loaded. Please check model file.")
248
  st.stop()
249
-
250
  if scaler is None:
251
- st.error("Scaler not loaded. Please check scaler file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  st.stop()
253
-
 
 
 
 
254
  try:
255
- # Create input DataFrame
256
- input_df = pd.DataFrame([input_values])
257
-
258
- # Apply log1p to submeter columns
259
- for col in SUBMETER_COLS:
260
- if col in input_df.columns:
261
- input_df[col] = np.log1p(input_df[col])
262
-
263
- # Scale numeric features
264
- if NUMERIC_COLS_TO_SCALE:
265
- scaled_values = scaler.transform(input_df[NUMERIC_COLS_TO_SCALE])
266
- input_df[NUMERIC_COLS_TO_SCALE] = scaled_values
267
-
268
- # Prepare final feature set
269
- X_input = input_df[FEATURES].values
270
-
271
- # Make prediction
272
- prediction = model.predict(X_input)[0]
273
-
274
- # Display result
275
- st.success(f"### Predicted Global Active Power: **{prediction:.6f}** kW")
276
-
277
- # Additional info
278
- with st.expander("📈 Prediction Details"):
279
- st.markdown(f"""
280
- **Model Used:** Decision Tree Regressor
281
- **Input Features:** {len(FEATURES)} features
282
- **Hour:** {hour_val}:00
283
- **Is Daytime:** {'Yes' if input_values['Is_daytime'] else 'No'}
284
- **Is Peak Hour:** {'Yes' if input_values['Is_peak_hour'] else 'No'}
285
- """)
286
-
287
  except Exception as e:
288
- st.error(f" Prediction failed: {str(e)}")
289
- st.info("Please check that all input values are valid numbers.")
290
 
291
- # Footer
292
- st.markdown("---")
293
- st.markdown("""
294
- <div style='text-align: center'>
295
- <p>Built with ❤️ using Streamlit | Model: Decision Tree Regressor</p>
296
- </div>
297
- """, unsafe_allow_html=True)
 
3
  import numpy as np
4
  import os
5
  import pickle
 
6
 
7
+ st.set_page_config(page_title="Household Power Consumption Prediction", layout="wide")
 
 
 
 
8
 
9
  # Hugging Face compatible paths
10
  RAW_FEATURES_CSV = "raw_features.csv"
11
+ MODEL_PKL = "trained_models/decision_tree_model.pkl" # Your uploaded model
12
  SCALER_PKL = "trained_models/scaler.pkl"
13
 
14
  FEATURES = [
 
26
 
27
  SUBMETER_COLS = ['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
28
 
29
+ # Load model and csv
30
  @st.cache_resource
31
  def load_csv(path):
32
+ if not os.path.exists(path):
33
+ return None
34
+ return pd.read_csv(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  @st.cache_resource
37
+ def load_pickle(path):
38
+ if not os.path.exists(path):
39
+ return None
40
+ with open(path, "rb") as f:
41
+ return pickle.load(f)
 
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  raw_df = load_csv(RAW_FEATURES_CSV)
44
+ scaler = load_pickle(SCALER_PKL)
45
+ model = load_pickle(MODEL_PKL)
46
+
47
+ if raw_df is None:
48
+ st.error(f"raw_features.csv not found at: {RAW_FEATURES_CSV}")
49
+ st.stop()
50
 
51
+ if model is None:
52
+ st.warning("Model not found or failed to load. Prediction will be disabled.")
53
+ if scaler is None:
54
+ st.warning("Scaler not found or failed to load. Prediction will be disabled.")
55
+
56
+ # Session defaults & pools
57
  if 'suggestion_pools' not in st.session_state:
58
  st.session_state.suggestion_pools = {}
59
 
60
+ # Build suggestion
61
  def build_pool_for_feature(feat):
62
+ if feat in raw_df.columns:
 
63
  vals = raw_df[feat].dropna().unique().tolist()
64
+ if len(vals) == 0:
65
+ return [0.0]
66
+ return vals
 
 
 
 
 
 
 
 
67
  else:
68
+ if feat == 'Hour':
69
+ return list(range(0, 24))
70
+ elif feat in SUBMETER_COLS:
71
+ return [0.0, 1.0, 2.0, 5.0, 10.0]
72
+ else:
73
+ return [0.0, 1.0, 2.0, 3.0, 4.0]
74
 
 
75
  for feat in FEATURES:
76
  st.session_state.suggestion_pools[feat] = build_pool_for_feature(feat)
77
 
78
  # Pre-fill sample input
79
+ def generate_custom_prefill():
 
80
  for feat, pool in st.session_state.suggestion_pools.items():
81
+ try:
82
  val = np.random.choice(pool)
83
+ except Exception:
84
  val = 0 if feat == 'Hour' else 0.0
 
 
85
  if feat == 'Hour':
86
+ st.session_state[f"cust_{feat}"] = int(float(val))
87
+ st.session_state[f"cust_txt_{feat}"] = str(int(float(val)))
88
  else:
89
+ st.session_state[f"cust_txt_{feat}"] = f"{float(val):.6f}"
90
+ st.session_state[f"cust_{feat}"] = float(val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
 
 
 
 
 
 
92
 
93
+ # UI
94
+ st.title("Household Power Consumption Prediction")
95
 
96
+ if st.button("Generate Random values"):
97
+ generate_custom_prefill()
98
+ st.rerun()
99
 
100
+ cols = st.columns(2)
101
+ editable_values = {}
102
+ i = 0
103
+ for feat in FEATURES:
104
  if feat in ['Is_peak_hour', 'Is_daytime']:
105
  continue
106
+ colw = cols[i % 2]
107
+ i += 1
 
108
  if feat == 'Hour':
109
+ default_val = st.session_state.get(f"cust_{feat}", 9)
110
+ val = colw.number_input("Hour (0-23)", min_value=0, max_value=23, value=int(default_val), step=1, format="%d", key=f"cust_{feat}")
111
+ editable_values['Hour'] = int(val)
 
 
 
 
 
 
 
112
  else:
113
+ suggested = st.session_state.suggestion_pools.get(feat, [])
114
+ placeholder = ""
115
+ if len(suggested) > 0:
116
+ try:
117
+ placeholder = f" (e.g. {float(suggested[0]):.3f})"
118
+ except Exception:
119
+ placeholder = f" (e.g. {suggested[0]})"
120
+ default_txt = st.session_state.get(f"cust_txt_{feat}", "")
121
+ txt = colw.text_input(f"{feat}{placeholder}", value=default_txt, key=f"cust_txt_{feat}")
122
+ if txt.strip() == "":
123
+ editable_values[feat] = None
124
+ else:
125
+ try:
126
+ editable_values[feat] = float(txt)
127
+ except Exception:
128
+ colw.error("Invalid numeric value")
129
+ editable_values[feat] = None
130
+
131
+ # auto flags
132
+ h = int(editable_values.get('Hour', 0) if editable_values.get('Hour', 0) is not None else 0)
133
+ editable_values['Is_daytime'] = 1 if (6 <= h < 18) else 0
134
+ editable_values['Is_peak_hour'] = 1 if (17 <= h <= 20) else 0
135
+
136
+ # Show all input columns in the preview
137
+ st.markdown("### Custom input preview (all features + flags)")
138
+ preview = {k: v for k, v in editable_values.items()}
139
+ preview_df = pd.DataFrame([preview])
140
+ cols_to_show = [c for c in FEATURES if c in preview_df.columns]
141
+ st.dataframe(preview_df[cols_to_show], use_container_width=True)
142
 
143
  st.markdown("---")
144
+ predict_btn = st.button("Predict Global Active Power")
145
 
 
 
 
 
 
 
146
 
147
+ # Prediction logic
148
  if predict_btn:
149
+ # validate custom inputs
150
+ missing = [feat for feat in FEATURES if feat not in editable_values or (editable_values[feat] is None and feat not in ['Is_peak_hour','Is_daytime'])]
151
+ if len(missing) > 0:
152
+ st.error(f"Please fill values for: {missing}")
 
 
153
  st.stop()
154
+ row = editable_values.copy()
155
+
156
+ # ensure model & scaler present
157
  if model is None:
158
+ st.error("Model not loaded. Fix MODEL_PKL path.")
159
  st.stop()
 
160
  if scaler is None:
161
+ st.error("Scaler not loaded. Fix SCALER_PKL path.")
162
+ st.stop()
163
+
164
+ # Build DataFrame row and ensure all FEATURES present
165
+ row_df = pd.DataFrame([row], index=["user"])
166
+ for c in FEATURES:
167
+ if c not in row_df.columns:
168
+ if c == 'Is_daytime':
169
+ h = int(row_df['Hour'].iloc[0])
170
+ row_df[c] = 1 if (6 <= h < 18) else 0
171
+ elif c == 'Is_peak_hour':
172
+ h = int(row_df['Hour'].iloc[0])
173
+ row_df[c] = 1 if (17 <= h <= 20) else 0
174
+ else:
175
+ row_df[c] = 0.0
176
+
177
+ # Ensure numeric conversion
178
+ try:
179
+ row_df = row_df.astype(float)
180
+ except Exception:
181
+ st.error("Some inputs could not be converted to float — check your values.")
182
+ st.stop()
183
+
184
+ # Save raw copy (hide flags in preview)
185
+ raw_to_show = row_df[FEATURES].copy()
186
+
187
+ # Apply log1p to submeter columns
188
+ log_df = raw_to_show.copy()
189
+ for c in SUBMETER_COLS:
190
+ log_df[c] = np.log1p(log_df[c].astype(float))
191
+
192
+ # Scale numeric columns
193
+ try:
194
+ scaled_vals = scaler.transform(log_df[NUMERIC_COLS_TO_SCALE].values)
195
+ except Exception as e:
196
+ st.error(f"Scaler.transform failed: {e}")
197
  st.stop()
198
+
199
+ scaled_df = log_df.copy()
200
+ scaled_df.loc[:, NUMERIC_COLS_TO_SCALE] = scaled_vals
201
+
202
+ X_for_model = scaled_df[FEATURES].values
203
  try:
204
+ pred = model.predict(X_for_model)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  except Exception as e:
206
+ st.error(f"Model prediction failed: {e}")
207
+ st.stop()
208
 
209
+ st.success(f"Predicted Global_active_power: **{pred:.6f}** (model units)")