Spaces:

prasanthr0416
/

Household_Power_Usage_Forecast

Sleeping

App Files Files Community

prasanthr0416 commited on Jan 6

Commit

040f2f4

verified ·

1 Parent(s): 0468b42

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -227

app.py CHANGED Viewed

@@ -3,17 +3,12 @@ import pandas as pd
 import numpy as np
 import os
 import pickle
-import joblib
-st.set_page_config(
-    page_title="Household Power Consumption Prediction",
-    page_icon="⚡",
-    layout="wide"
-)
 # Hugging Face compatible paths
 RAW_FEATURES_CSV = "raw_features.csv"
-MODEL_PKL = "trained_models/decision_tree_model.pkl"  # Changed to your decision tree model
 SCALER_PKL = "trained_models/scaler.pkl"
 FEATURES = [
@@ -31,267 +26,184 @@ NUMERIC_COLS_TO_SCALE = [
 SUBMETER_COLS = ['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
-# Enhanced loading with better error handling
 @st.cache_resource
 def load_csv(path):
-    """Load CSV file with multiple fallback options"""
-    if os.path.exists(path):
-        return pd.read_csv(path)
-    # Try alternative paths
-    alternative_paths = [
-        "./raw_features.csv",
-        "data/raw_features.csv",
-        "../raw_features.csv"
-    ]
-    for alt_path in alternative_paths:
-        if os.path.exists(alt_path):
-            return pd.read_csv(alt_path)
-    st.warning(f"CSV file not found. Using default values.")
-    return None
 @st.cache_resource
-def load_model_pickle(path):
-    """Load pickle file with error handling"""
-    try:
-        if os.path.exists(path):
-            with open(path, "rb") as f:
-                return pickle.load(f)
-        # Try with joblib (more reliable)
-        if os.path.exists(path):
-            return joblib.load(path)
-    except Exception as e:
-        st.error(f"Error loading model: {e}")
-    return None
-@st.cache_resource
-def load_scaler_pickle(path):
-    """Load scaler pickle file"""
-    try:
-        if os.path.exists(path):
-            # Try standard pickle first
-            with open(path, "rb") as f:
-                return pickle.load(f)
-        # Try joblib
-        if os.path.exists(path):
-            return joblib.load(path)
-    except Exception as e:
-        st.error(f"Error loading scaler: {e}")
-    return None
-# Load data and models
 raw_df = load_csv(RAW_FEATURES_CSV)
-model = load_model_pickle(MODEL_PKL)
-scaler = load_scaler_pickle(SCALER_PKL)
-# Initialize session state for suggestions
 if 'suggestion_pools' not in st.session_state:
     st.session_state.suggestion_pools = {}
-# Build suggestion pools
 def build_pool_for_feature(feat):
-    """Create suggestion values for each feature"""
-    if raw_df is not None and feat in raw_df.columns:
         vals = raw_df[feat].dropna().unique().tolist()
-        if len(vals) > 0:
-            # Take first 5 unique values for suggestions
-            return vals[:5]
-    # Default values if CSV not loaded
-    if feat == 'Hour':
-        return list(range(0, 24))
-    elif feat in SUBMETER_COLS:
-        return [0.0, 1.0, 2.0, 5.0, 10.0]
-    elif 'Voltage' in feat:
-        return [230.0, 235.0, 240.0, 245.0, 250.0]
     else:
-        return [0.0, 0.5, 1.0, 1.5, 2.0]
-# Initialize suggestion pools
 for feat in FEATURES:
     st.session_state.suggestion_pools[feat] = build_pool_for_feature(feat)
 # Pre-fill sample input
-def generate_random_values():
-    """Generate random values for all features"""
     for feat, pool in st.session_state.suggestion_pools.items():
-        if len(pool) > 0:
             val = np.random.choice(pool)
-        else:
             val = 0 if feat == 'Hour' else 0.0
-        # Store in session state
         if feat == 'Hour':
-            st.session_state[f"input_{feat}"] = int(float(val))
         else:
-            st.session_state[f"input_{feat}"] = float(val)
-# Initialize random values if not exists
-if 'initialized' not in st.session_state:
-    generate_random_values()
-    st.session_state.initialized = True
-# UI Layout
-st.title("⚡ Household Power Consumption Prediction")
-st.markdown("Predict Global Active Power using Decision Tree Model")
-# Sidebar for info
-with st.sidebar:
-    st.header("ℹ️ Information")
-    st.markdown("""
-    **Features Used:**
-    - Global Reactive Power
-    - Voltage
-    - Sub-metering 1, 2, 3
-    - Daily averages
-    - Time features (Hour, Peak hours, Daytime)
-    """)
-    if model is not None:
-        st.success("✅ Decision Tree Model Loaded")
-    else:
-        st.error("❌ Model not loaded")
-    if scaler is not None:
-        st.success("✅ Scaler Loaded")
-    else:
-        st.error("❌ Scaler not loaded")
-# Generate Random Values Button
-col1, col2 = st.columns([1, 3])
-with col1:
-    if st.button("🎲 Generate Random Values", use_container_width=True):
-        generate_random_values()
-        st.rerun()
-# Input fields in columns
-st.header("📝 Input Features")
-cols = st.columns(2)
-input_values = {}
-for i, feat in enumerate(FEATURES):
     if feat in ['Is_peak_hour', 'Is_daytime']:
         continue
-    col = cols[i % 2]
     if feat == 'Hour':
-        default_val = st.session_state.get(f"input_{feat}", 12)
-        val = col.number_input(
-            "Hour (0-23)",
-            min_value=0,
-            max_value=23,
-            value=int(default_val),
-            step=1,
-            key=f"num_{feat}"
-        )
-        input_values[feat] = val
     else:
-        # Show suggestion from pool
-        suggestions = st.session_state.suggestion_pools.get(feat, [])
-        suggestion_text = ""
-        if suggestions:
-            suggestion_text = f"Suggestions: {', '.join([f'{s:.2f}' for s in suggestions[:3]])}"
-        default_val = st.session_state.get(f"input_{feat}", 0.0)
-        val = col.number_input(
-            f"{feat}",
-            value=float(default_val),
-            format="%.4f",
-            key=f"num_{feat}",
-            help=suggestion_text
-        )
-        input_values[feat] = val
-# Auto-calculate flags
-hour_val = input_values.get('Hour', 12)
-input_values['Is_daytime'] = 1 if (6 <= hour_val < 18) else 0
-input_values['Is_peak_hour'] = 1 if (17 <= hour_val <= 20) else 0
-# Display input preview
-st.markdown("### 📊 Input Preview")
-preview_df = pd.DataFrame([input_values])
-# Reorder columns to match FEATURES order
-preview_df = preview_df[FEATURES]
-st.dataframe(preview_df.style.format("{:.4f}"), use_container_width=True)
 st.markdown("---")
-# Prediction section
-st.header("🔮 Prediction")
-predict_col1, predict_col2 = st.columns([1, 3])
-with predict_col1:
-    predict_btn = st.button("🚀 Predict Global Active Power", type="primary", use_container_width=True)
 if predict_btn:
-    # Validate inputs
-    missing = [feat for feat in FEATURES
-               if feat not in input_values or input_values[feat] is None]
-    if missing:
-        st.error(f"❌ Missing values for: {', '.join(missing)}")
         st.stop()
-    # Check model and scaler
     if model is None:
-        st.error("❌ Model not loaded. Please check model file.")
         st.stop()
     if scaler is None:
-        st.error("❌ Scaler not loaded. Please check scaler file.")
         st.stop()
     try:
-        # Create input DataFrame
-        input_df = pd.DataFrame([input_values])
-        # Apply log1p to submeter columns
-        for col in SUBMETER_COLS:
-            if col in input_df.columns:
-                input_df[col] = np.log1p(input_df[col])
-        # Scale numeric features
-        if NUMERIC_COLS_TO_SCALE:
-            scaled_values = scaler.transform(input_df[NUMERIC_COLS_TO_SCALE])
-            input_df[NUMERIC_COLS_TO_SCALE] = scaled_values
-        # Prepare final feature set
-        X_input = input_df[FEATURES].values
-        # Make prediction
-        prediction = model.predict(X_input)[0]
-        # Display result
-        st.success(f"### Predicted Global Active Power: **{prediction:.6f}** kW")
-        # Additional info
-        with st.expander("📈 Prediction Details"):
-            st.markdown(f"""
-            **Model Used:** Decision Tree Regressor
-            **Input Features:** {len(FEATURES)} features
-            **Hour:** {hour_val}:00
-            **Is Daytime:** {'Yes' if input_values['Is_daytime'] else 'No'}
-            **Is Peak Hour:** {'Yes' if input_values['Is_peak_hour'] else 'No'}
-            """)
     except Exception as e:
-        st.error(f"❌ Prediction failed: {str(e)}")
-        st.info("Please check that all input values are valid numbers.")
-# Footer
-st.markdown("---")
-st.markdown("""
-<div style='text-align: center'>
-    <p>Built with ❤️ using Streamlit | Model: Decision Tree Regressor</p>
-</div>
-""", unsafe_allow_html=True)

 import numpy as np
 import os
 import pickle
+st.set_page_config(page_title="Household Power Consumption Prediction", layout="wide")
 # Hugging Face compatible paths
 RAW_FEATURES_CSV = "raw_features.csv"
+MODEL_PKL = "trained_models/decision_tree_model.pkl"  # Your uploaded model
 SCALER_PKL = "trained_models/scaler.pkl"
 FEATURES = [
 SUBMETER_COLS = ['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
+# Load model and csv
 @st.cache_resource
 def load_csv(path):
+    if not os.path.exists(path):
+        return None
+    return pd.read_csv(path)
 @st.cache_resource
+def load_pickle(path):
+    if not os.path.exists(path):
+        return None
+    with open(path, "rb") as f:
+        return pickle.load(f)
 raw_df = load_csv(RAW_FEATURES_CSV)
+scaler = load_pickle(SCALER_PKL)
+model = load_pickle(MODEL_PKL)
+if raw_df is None:
+    st.error(f"raw_features.csv not found at: {RAW_FEATURES_CSV}")
+    st.stop()
+if model is None:
+    st.warning("Model not found or failed to load. Prediction will be disabled.")
+if scaler is None:
+    st.warning("Scaler not found or failed to load. Prediction will be disabled.")
+# Session defaults & pools
 if 'suggestion_pools' not in st.session_state:
     st.session_state.suggestion_pools = {}
+# Build suggestion
 def build_pool_for_feature(feat):
+    if feat in raw_df.columns:
         vals = raw_df[feat].dropna().unique().tolist()
+        if len(vals) == 0:
+            return [0.0]
+        return vals
     else:
+        if feat == 'Hour':
+            return list(range(0, 24))
+        elif feat in SUBMETER_COLS:
+            return [0.0, 1.0, 2.0, 5.0, 10.0]
+        else:
+            return [0.0, 1.0, 2.0, 3.0, 4.0]
 for feat in FEATURES:
     st.session_state.suggestion_pools[feat] = build_pool_for_feature(feat)
 # Pre-fill sample input
+def generate_custom_prefill():
     for feat, pool in st.session_state.suggestion_pools.items():
+        try:
             val = np.random.choice(pool)
+        except Exception:
             val = 0 if feat == 'Hour' else 0.0
         if feat == 'Hour':
+            st.session_state[f"cust_{feat}"] = int(float(val))
+            st.session_state[f"cust_txt_{feat}"] = str(int(float(val)))
         else:
+            st.session_state[f"cust_txt_{feat}"] = f"{float(val):.6f}"
+            st.session_state[f"cust_{feat}"] = float(val)
+# UI
+st.title("Household Power Consumption Prediction")
+if st.button("Generate Random values"):
+    generate_custom_prefill()
+    st.rerun()
+cols = st.columns(2)
+editable_values = {}
+i = 0
+for feat in FEATURES:
     if feat in ['Is_peak_hour', 'Is_daytime']:
         continue
+    colw = cols[i % 2]
+    i += 1
     if feat == 'Hour':
+        default_val = st.session_state.get(f"cust_{feat}", 9)
+        val = colw.number_input("Hour (0-23)", min_value=0, max_value=23, value=int(default_val), step=1, format="%d", key=f"cust_{feat}")
+        editable_values['Hour'] = int(val)
     else:
+        suggested = st.session_state.suggestion_pools.get(feat, [])
+        placeholder = ""
+        if len(suggested) > 0:
+            try:
+                placeholder = f" (e.g. {float(suggested[0]):.3f})"
+            except Exception:
+                placeholder = f" (e.g. {suggested[0]})"
+        default_txt = st.session_state.get(f"cust_txt_{feat}", "")
+        txt = colw.text_input(f"{feat}{placeholder}", value=default_txt, key=f"cust_txt_{feat}")
+        if txt.strip() == "":
+            editable_values[feat] = None
+        else:
+            try:
+                editable_values[feat] = float(txt)
+            except Exception:
+                colw.error("Invalid numeric value")
+                editable_values[feat] = None
+# auto flags
+h = int(editable_values.get('Hour', 0) if editable_values.get('Hour', 0) is not None else 0)
+editable_values['Is_daytime'] = 1 if (6 <= h < 18) else 0
+editable_values['Is_peak_hour'] = 1 if (17 <= h <= 20) else 0
+# Show all input columns in the preview
+st.markdown("### Custom input preview (all features + flags)")
+preview = {k: v for k, v in editable_values.items()}
+preview_df = pd.DataFrame([preview])
+cols_to_show = [c for c in FEATURES if c in preview_df.columns]
+st.dataframe(preview_df[cols_to_show], use_container_width=True)
 st.markdown("---")
+predict_btn = st.button("Predict Global Active Power")
+# Prediction logic
 if predict_btn:
+    # validate custom inputs
+    missing = [feat for feat in FEATURES if feat not in editable_values or (editable_values[feat] is None and feat not in ['Is_peak_hour','Is_daytime'])]
+    if len(missing) > 0:
+        st.error(f"Please fill values for: {missing}")
         st.stop()
+    row = editable_values.copy()
+    # ensure model & scaler present
     if model is None:
+        st.error("Model not loaded. Fix MODEL_PKL path.")
         st.stop()
     if scaler is None:
+        st.error("Scaler not loaded. Fix SCALER_PKL path.")
+        st.stop()
+    # Build DataFrame row and ensure all FEATURES present
+    row_df = pd.DataFrame([row], index=["user"])
+    for c in FEATURES:
+        if c not in row_df.columns:
+            if c == 'Is_daytime':
+                h = int(row_df['Hour'].iloc[0])
+                row_df[c] = 1 if (6 <= h < 18) else 0
+            elif c == 'Is_peak_hour':
+                h = int(row_df['Hour'].iloc[0])
+                row_df[c] = 1 if (17 <= h <= 20) else 0
+            else:
+                row_df[c] = 0.0
+    # Ensure numeric conversion
+    try:
+        row_df = row_df.astype(float)
+    except Exception:
+        st.error("Some inputs could not be converted to float — check your values.")
+        st.stop()
+    # Save raw copy (hide flags in preview)
+    raw_to_show = row_df[FEATURES].copy()
+    # Apply log1p to submeter columns
+    log_df = raw_to_show.copy()
+    for c in SUBMETER_COLS:
+        log_df[c] = np.log1p(log_df[c].astype(float))
+    # Scale numeric columns
+    try:
+        scaled_vals = scaler.transform(log_df[NUMERIC_COLS_TO_SCALE].values)
+    except Exception as e:
+        st.error(f"Scaler.transform failed: {e}")
         st.stop()
+    scaled_df = log_df.copy()
+    scaled_df.loc[:, NUMERIC_COLS_TO_SCALE] = scaled_vals
+    X_for_model = scaled_df[FEATURES].values
     try:
+        pred = model.predict(X_for_model)[0]
     except Exception as e:
+        st.error(f"Model prediction failed: {e}")
+        st.stop()
+    st.success(f"Predicted Global_active_power: **{pred:.6f}** (model units)")