import streamlit as st import pandas as pd import matplotlib import matplotlib.pyplot as plt import plotly.express as px import numpy as np import plotly.graph_objects as go import sqlite3 from typing import Optional, Dict, Any from datetime import datetime, timedelta import re from pathlib import Path import time # Add this import to the top of your script import math import plotly.graph_objects as go from pymoo.core.problem import Problem from pymoo.core.repair import Repair from pymoo.algorithms.moo.nsga2 import NSGA2 from pymoo.optimize import minimize from pymoo.core.callback import Callback ## ------------------------------------------------------------------------------------------------------------------------------------------- ## Functions ##--------------------------------------------------------------------------------------------------------------------------------------------- class OptimizationProgressCallback(Callback): def __init__(self, progress_bar, total_generations): super().__init__() self.progress_bar = progress_bar self.total_generations = total_generations def notify(self, algorithm): # Calculate progress percentage progress_percent = algorithm.n_gen / self.total_generations # --- FIX: Changed the text to show percentage --- progress_text = f"Optimizing... {progress_percent:.0%}" # Update the Streamlit progress bar self.progress_bar.progress(progress_percent, text=progress_text) class NormalizationRepair(Repair): """Ensures the first 5 variables (fractions) sum to 1.""" def _do(self, problem, X, **kwargs): fractions = X[:, :5] row_sums = np.sum(fractions, axis=1, keepdims=True) row_sums[row_sums == 0] = 1 normalized_fractions = fractions / row_sums X[:, :5] = normalized_fractions return X class BlendOptimizationProblem(Problem): def __init__(self, blend_model, target_properties, fixed_inputs, frozen_targets, input_columns, output_columns, optimize_cost=False): self.blend_model = blend_model self.target_properties = target_properties self.fixed_inputs = fixed_inputs self.frozen_targets = frozen_targets self.input_columns = input_columns self.output_columns = output_columns # Columns from predict_fast self.optimize_cost = optimize_cost n_constraints = len(self.frozen_targets) n_obj = 2 if self.optimize_cost else 1 super().__init__(n_var=5, n_obj=n_obj, n_constr=n_constraints if n_constraints > 0 else 1, xl=0.0, xu=1.0) self.epsilon = 1e-3 self.frozen_indices_predicted = [self.output_columns.index(f'BlendProperty{k}') for k in self.frozen_targets.keys() if f'BlendProperty{k}' in self.output_columns] self.frozen_values = np.array([v for k, v in self.frozen_targets.items() if f'BlendProperty{k}' in self.output_columns]) self.objective_mask_predicted = np.ones(len(self.output_columns), dtype=bool) if self.frozen_indices_predicted: self.objective_mask_predicted[self.frozen_indices_predicted] = False all_output_prop_indices = [int(col.replace('BlendProperty', '')) for col in self.output_columns] self.objective_targets = np.array([ self.target_properties[prop_idx - 1] for i, prop_idx in enumerate(all_output_prop_indices) if self.objective_mask_predicted[i] ]) def _evaluate(self, x, out, *args, **kwargs): fixed_data = np.tile(self.fixed_inputs, (len(x), 1)) full_input_data = np.hstack([x, fixed_data]) input_df = pd.DataFrame(full_input_data, columns=self.input_columns) # STEP 1: Use predict_fast for optimization loop predicted_properties = self.blend_model.predict_fast(input_df)[self.output_columns].values error = np.sum((predicted_properties[:, self.objective_mask_predicted] - self.objective_targets)**2, axis=1) if self.optimize_cost: component_costs = np.array([st.session_state.get(f"opt_c{i}_cost", 0.0) for i in range(5)]) cost = input_df.iloc[:, :5].values @ component_costs out["F"] = np.column_stack([error, cost]) else: out["F"] = error if self.frozen_targets and self.frozen_indices_predicted: frozen_violations = np.abs(predicted_properties[:, self.frozen_indices_predicted] - self.frozen_values) - self.epsilon out["G"] = frozen_violations else: out["G"] = np.zeros(len(x)) # def run_real_optimization(targets, fixed_targets, components_data, include_cost): # """Main function to run the pymoo optimization.""" # # 1. SETUP # blend_model = st.session_state.predictor # # All 55 input columns (5 fractions + 50 properties) # input_cols = [f'Component{i+1}_fraction' for i in range(5)] # for j in range(1, 11): # for i in range(1, 6): # input_cols.append(f'Component{i}_Property{j}') # # The 50 fixed property values from the UI # fixed_model_inputs = [] # for j in range(1, 11): # for i in range(5): # fixed_model_inputs.append(st.session_state.get(f"opt_c{i}_prop{j}", 0.0)) # fixed_model_inputs = np.array(fixed_model_inputs) # # STEP 2: Optimize based on only the 6 specified properties # output_cols_fast = [f'BlendProperty{i}' for i in [1, 2, 5, 6, 7, 10]] # full_target_properties = np.array(list(targets.values())) # frozen_targets_to_use = {int(k.replace('Property', '')): v for k, v in fixed_targets.items()} # # 2. RUN OPTIMIZATION # problem = BlendOptimizationProblem( # blend_model=blend_model, target_properties=full_target_properties, # fixed_inputs=fixed_model_inputs, frozen_targets=frozen_targets_to_use, # input_columns=input_cols, output_columns=output_cols_fast, # optimize_cost=include_cost # ) # algorithm = NSGA2(pop_size=100, repair=NormalizationRepair(), eliminate_duplicates=True) # res = minimize(problem, algorithm, termination=('n_gen', 50), seed=1, verbose=False) # # 3. PROCESS AND RETURN RESULTS # if res.X is None or len(res.X) == 0: # st.error("Optimization failed to find a feasible solution. Consider relaxing your constraints.") # return [] # # Prepare a full input DataFrame to get all 10 properties for the UI display # final_fractions_df = pd.DataFrame(res.X, columns=[f'Component{i+1}_fraction' for i in range(5)]) # fixed_df_part = pd.DataFrame([fixed_model_inputs] * len(final_fractions_df), columns=input_cols[5:]) # full_input_for_final_pred = pd.concat([final_fractions_df, fixed_df_part], axis=1) # # Use predict_all to get the full 10 properties for the UI, ensuring compatibility # all_10_properties_df = blend_model.predict_all(full_input_for_final_pred) # solutions = [] # for i in range(len(res.X)): # solution_data = { # "component_fractions": res.X[i], # "blend_properties": all_10_properties_df.iloc[i].values, # Full 10 properties # "error": res.F[i][0], # "optimized_cost": res.F[i][1] if include_cost else 0.0 # } # solutions.append(solution_data) # return solutions def run_real_optimization(targets, fixed_targets, components_data, include_cost, generations, pop_size, progress_bar): """Main function to run the pymoo optimization.""" # 1. SETUP (Remains the same) blend_model = st.session_state.predictor input_cols = [f'Component{i+1}_fraction' for i in range(5)] for j in range(1, 11): for i in range(1, 6): input_cols.append(f'Component{i}_Property{j}') fixed_model_inputs = [] for j in range(1, 11): for i in range(5): fixed_model_inputs.append(st.session_state.get(f"opt_c{i}_prop{j}", 0.0)) fixed_model_inputs = np.array(fixed_model_inputs) output_cols_fast = [f'BlendProperty{i}' for i in [1, 2, 5, 6, 7, 10]] full_target_properties = np.array(list(targets.values())) frozen_targets_to_use = {int(k.replace('Property', '')): v for k, v in fixed_targets.items()} # 2. RUN OPTIMIZATION (Remains the same) problem = BlendOptimizationProblem( blend_model=blend_model, target_properties=full_target_properties, fixed_inputs=fixed_model_inputs, frozen_targets=frozen_targets_to_use, input_columns=input_cols, output_columns=output_cols_fast, optimize_cost=include_cost ) algorithm = NSGA2(pop_size=pop_size, repair=NormalizationRepair(), eliminate_duplicates=True) # Instantiate the callback with the progress bar and total generations callback = OptimizationProgressCallback(progress_bar, generations) # Add the 'callback' argument to the minimize function res = minimize(problem, algorithm, termination=('n_gen', generations), seed=1, verbose=False, callback=callback) # 3. PROCESS AND RETURN RESULTS (This section is modified) if res.X is None or len(res.X) == 0: st.error("Optimization failed to find a feasible solution. Consider relaxing your constraints.") return [] # --- FIX: NO predict_all(). Instead, we build the final property list manually. --- # First, get the final *predicted* values for the 6 optimized properties final_fractions_df = pd.DataFrame(res.X, columns=[f'Component{i+1}_fraction' for i in range(5)]) fixed_df_part = pd.DataFrame([fixed_model_inputs] * len(final_fractions_df), columns=input_cols[5:]) full_input_for_fast_pred = pd.concat([final_fractions_df, fixed_df_part], axis=1) predicted_6_properties_df = blend_model.predict_fast(full_input_for_fast_pred) solutions = [] optimized_prop_indices = [1, 2, 5, 6, 7, 10] for i in range(len(res.X)): # Create a 10-element array for the UI final_10_properties = np.zeros(10) for prop_idx in range(1, 11): if prop_idx in optimized_prop_indices: # For the 6 optimized properties, use the value from predict_fast col_name = f'BlendProperty{prop_idx}' final_10_properties[prop_idx - 1] = predicted_6_properties_df[col_name].iloc[i] else: # For the other 4, use the user's original target value as a placeholder final_10_properties[prop_idx - 1] = targets[f'Property{prop_idx}'] solution_data = { "component_fractions": res.X[i], "blend_properties": final_10_properties, # Use the manually constructed array "error": res.F[i][0], "optimized_cost": res.F[i][1] if include_cost else 0.0 } solutions.append(solution_data) solutions.sort(key=lambda x: x.get('error', float('inf'))) return solutions def calculate_quality_score(error, tolerance=1e-3): """Calculates a quality score, handling potential math errors.""" if error is None or tolerance <= 0: return 0.0 # If error is high, score is 0 to avoid math errors with log. if error >= tolerance: return 0.0 # Prevent division by zero if error equals tolerance exactly in floating point. ratio = error / tolerance if ratio >= 1.0: return 0.0 try: # The core formula score = 100 * (1 / (1 + math.log(1 - ratio))) # Ensure score is capped between 0 and 100 return max(0, min(100, score)) except (ValueError, TypeError): # Catch any other unexpected math errors return 0.0 @st.cache_data def get_all_blends_data(db_path="eagleblend.db") -> pd.DataFrame: """Fetches all blend data, sorted by the most recent entries.""" with sqlite3.connect(db_path) as conn: # Assuming 'id' is the primary key indicating recency query = "SELECT * FROM blends ORDER BY id DESC" df = pd.read_sql_query(query, conn) return df def filter_component_options(df: pd.DataFrame, component_index: int) -> list: """ Filters component options for a dropdown. - Primary filter: by 'component_type' matching the component index + 1. - Fallback filter: by 'component_name' ending with '_Component_{index+1}'. """ target_type = component_index + 1 # Primary Filter: Use 'component_type' if the column exists and has data. if 'component_type' in df.columns and not df['component_type'].isnull().all(): # Use .loc to avoid SettingWithCopyWarning filtered_df = df.loc[df['component_type'] == target_type] if not filtered_df.empty: return filtered_df['component_name'].tolist() # Fallback Filter: If the primary filter fails or doesn't apply, use the name. # The 'na=False' gracefully handles any nulls in the component_name column. fallback_df = df.loc[df['component_name'].str.endswith(f"_Component_{target_type}", na=False)] return fallback_df['component_name'].tolist() # ---------------------- Page Config ---------------------- st.set_page_config( layout="wide", page_title="Eagle Blend Optimizer", page_icon="๐Ÿฆ…", initial_sidebar_state="collapsed" ) # ---------------------- Sidebar Content ---------------------- with st.sidebar: st.markdown("---") st.markdown("### ๐Ÿฆ… Developed by eagle-team") st.markdown(""" - Destiny Otto - Williams Alabi - Godswill Otto - Alexander Ifenaike """) st.markdown("---") st.info("Select a tab above to get started.") # ---------------------- Custom Styling ---------------------- ##e0e0e0; st.markdown(""" """, unsafe_allow_html=True) # ---------------------- App Header ---------------------- # --- This is the new header with the subtitle --- st.markdown("""

๐Ÿฆ… Eagle Blend Optimizer

AI-Powered Fuel Blend Property Prediction & Optimization

by eagle-team for the Shell.ai 2025 Hackathon

""", unsafe_allow_html=True) #------ universal variables # ---------------------- Tabs ---------------------- tabs = st.tabs([ "๐Ÿ“Š Dashboard", "๐ŸŽ›๏ธ Blend Designer", "โš™๏ธ Optimization Engine", "๐Ÿ“ค Blend Comparison", "๐Ÿ“š Fuel Registry", "๐Ÿง  Model Insights" ]) def explode_blends_to_components(blends_df: pd.DataFrame, n_components: int = 5, keep_empty: bool = False, blend_name_col: str = "blend_name") -> pd.DataFrame: """ Convert a blends DataFrame into a components DataFrame. Parameters ---------- blends_df : pd.DataFrame DataFrame with columns following the pattern: Component1_fraction, Component1_Property1..Property10, Component1_unit_cost, ... n_components : int Number of components per blend (default 5). blend_name_col : str Column name in blends_df that stores the blend name. Returns ------- pd.DataFrame components_df with columns: ['blend_name', 'component_name', 'component_fraction', 'property1', ..., 'property10', 'unit_cost'] """ components_rows = [] prop_names = [f"property{i}" for i in range(1, 11)] for _, blend_row in blends_df.iterrows(): blend_name = blend_row.get(blend_name_col) # Fallback if blend_name is missing/empty - keep index-based fallback if not blend_name or str(blend_name).strip() == "": # use the dataframe index + 1 to create a fallback name blend_name = f"blend{int(blend_row.name) + 1}" for i in range(1, n_components + 1): # Build column keys frac_col = f"Component{i}_fraction" unit_cost_col = f"Component{i}_unit_cost" prop_cols = [f"Component{i}_Property{j}" for j in range(1, 11)] # Safely get values (if column missing, get NaN) comp_frac = blend_row.get(frac_col, np.nan) comp_unit_cost = blend_row.get(unit_cost_col, np.nan) comp_props = [blend_row.get(pc, np.nan) for pc in prop_cols] row = { "blend_name": blend_name, "component_name": f"{blend_name}_Component_{i}", "component_fraction": comp_frac, "component_type": i, "unit_cost": comp_unit_cost } # add property1..property10 for j, v in enumerate(comp_props, start=1): row[f"property{j}"] = v components_rows.append(row) components_df = pd.DataFrame(components_rows) return components_df # --- Updated add_blends (now also populates components) --- def add_blends(df, db_path="eagleblend.db", n_components=5): df = df.copy() # 1) Ensure blend_name column for col in list(df.columns): low = col.strip().lower() if low in ("blend_name", "blend name", "blendname"): if col != "blend_name": df = df.rename(columns={col: "blend_name"}) break if "blend_name" not in df.columns: df["blend_name"] = pd.NA conn = sqlite3.connect(db_path) cur = conn.cursor() # 2) Determine next blend number cur.execute("SELECT blend_name FROM blends WHERE blend_name LIKE 'blend%'") nums = [int(m.group(1)) for (b,) in cur.fetchall() if (m := re.match(r"blend(\d+)$", str(b)))] start_num = max(nums) if nums else 0 # 3) Fill missing blend_name mask = df["blend_name"].isna() | (df["blend_name"].astype(str).str.strip() == "") df.loc[mask, "blend_name"] = [f"blend{i}" for i in range(start_num + 1, start_num + 1 + mask.sum())] # 4) Safe insert into blends cur.execute("PRAGMA table_info(blends)") db_cols = [r[1] for r in cur.fetchall()] safe_df = df[[c for c in df.columns if c in db_cols]] if not safe_df.empty: safe_df.to_sql("blends", conn, if_exists="append", index=False) # 5) Explode blends into components and insert into components table components_df = explode_blends_to_components(df, n_components=n_components, keep_empty=False) cur.execute("PRAGMA table_info(components)") comp_cols = [r[1] for r in cur.fetchall()] safe_components_df = components_df[[c for c in components_df.columns if c in comp_cols]] if not safe_components_df.empty: safe_components_df.to_sql("components", conn, if_exists="append", index=False) conn.commit() conn.close() return { "blends_inserted": int(safe_df.shape[0]), "components_inserted": int(safe_components_df.shape[0]) } # --- add_components function --- def add_components(df, db_path="eagleblend.db"): df = df.copy() # Ensure blend_name exists for col in list(df.columns): low = col.strip().lower() if low in ("blend_name", "blend name", "blendname"): if col != "blend_name": df = df.rename(columns={col: "blend_name"}) break if "blend_name" not in df.columns: df["blend_name"] = pd.NA # Ensure component_name exists if "component_name" not in df.columns: df["component_name"] = pd.NA conn = sqlite3.connect(db_path) cur = conn.cursor() # Fill missing component_name mask = df["component_name"].isna() | (df["component_name"].astype(str).str.strip() == "") df.loc[mask, "component_name"] = [ f"{bn}_Component_{i+1}" for i, bn in enumerate(df["blend_name"].fillna("blend_unknown")) ] # Safe insert into components cur.execute("PRAGMA table_info(components)") db_cols = [r[1] for r in cur.fetchall()] safe_df = df[[c for c in df.columns if c in db_cols]] if not safe_df.empty: safe_df.to_sql("components", conn, if_exists="append", index=False) conn.commit() conn.close() return int(safe_df.shape[0]) def get_blends_overview(db_path: str = "eagleblend.db", last_n: int = 5) -> Dict[str, Any]: """ Returns: { "max_saving": float | None, # raw numeric (PreOpt_Cost - Optimized_Cost) "last_blends": pandas.DataFrame, # last_n rows of selected columns "daily_counts": pandas.Series # counts per day, index = 'YYYY-MM-DD' (strings) } """ last_n = int(last_n) comp_cols = [ "blend_name", "Component1_fraction", "Component2_fraction", "Component3_fraction", "Component4_fraction", "Component5_fraction", "created_at" ] blend_props = [f"BlendProperty{i}" for i in range(1, 11)] select_cols = comp_cols + blend_props cols_sql = ", ".join(select_cols) with sqlite3.connect(db_path) as conn: # 1) scalar: max saving max_saving = conn.execute( "SELECT MAX(PreOpt_Cost - Optimized_Cost) " "FROM blends " "WHERE PreOpt_Cost IS NOT NULL AND Optimized_Cost IS NOT NULL" ).fetchone()[0] # 2) last N rows (only selected columns) q_last = f""" SELECT {cols_sql} FROM blends ORDER BY id DESC LIMIT {last_n} """ df_last = pd.read_sql_query(q_last, conn) # 3) daily counts (group by date) q_counts = """ SELECT date(created_at) AS day, COUNT(*) AS cnt FROM blends WHERE created_at IS NOT NULL GROUP BY day ORDER BY day DESC """ df_counts = pd.read_sql_query(q_counts, conn) # Convert counts to a Series with day strings as index (fast, small memory) if not df_counts.empty: daily_counts = pd.Series(df_counts["cnt"].values, index=df_counts["day"].astype(str)) daily_counts.index.name = "day" daily_counts.name = "count" else: daily_counts = pd.Series(dtype=int, name="count") return {"max_saving": max_saving, "last_blends": df_last, "daily_counts": daily_counts} def get_activity_logs(db_path="eagleblend.db", timeframe="today", activity_type=None): """ Get counts of activities from the activity_log table within a specified timeframe. Args: db_path (str): Path to the SQLite database file. timeframe (str): Time period to filter ('today', 'this_week', 'this_month', or 'custom'). activity_type (str): Specific activity type to return count for. If None, return all counts. Returns: dict: Dictionary with counts per activity type OR a single integer if activity_type is specified. """ # Calculate time filter now = datetime.now() if timeframe == "today": start_time = now.replace(hour=0, minute=0, second=0, microsecond=0) elif timeframe == "this_week": start_time = now - timedelta(days=now.weekday()) # Monday of this week start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0) elif timeframe == "this_month": start_time = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) else: raise ValueError("Invalid timeframe. Use 'today', 'this_week', or 'this_month'.") # Query database conn = sqlite3.connect(db_path) query = f""" SELECT activity_type, COUNT(*) as count FROM activity_log WHERE timestamp >= ? GROUP BY activity_type """ df_counts = pd.read_sql_query(query, conn, params=(start_time.strftime("%Y-%m-%d %H:%M:%S"),)) conn.close() # Convert to dictionary counts_dict = dict(zip(df_counts["activity_type"], df_counts["count"])) # If specific activity requested if activity_type: return counts_dict.get(activity_type, 0) return counts_dict # print(get_activity_logs(timeframe="today")) # All activities today # print(get_activity_logs(timeframe="this_week")) # All activities this week # print(get_activity_logs(timeframe="today", activity_type="optimization")) # Only optimization count today # result = get_activity_logs(timeframe="this_week") # result['optimization'] # result['prediction'] def get_model(db_path="eagleblend.db"): """ Fetch the last model from the models_registry table. Returns: pandas.Series: A single row containing the last model's data. """ conn = sqlite3.connect(db_path) query = "SELECT * FROM models_registry ORDER BY id DESC LIMIT 1" df_last = pd.read_sql_query(query, conn) conn.close() if not df_last.empty: return df_last.iloc[0] # Return as a Series so you can access columns easily else: return None # last_model = get_model() # if last_model is not None: # print("R2 Score:", last_model["R2_Score"]) # ---------------------------------------------------------------------------------------------------------------------------------------------- # Dashboard Tab # ---------------------------------------------------------------------------------------------------------------------------------------------- with tabs[0]: # NOTE: Assuming these functions are defined elsewhere in your application # from your_utils import get_model, get_activity_logs, get_blends_overview # ---------- formatting helpers ---------- def fmt_int(x): try: return f"{int(x):,}" except Exception: return "0" def fmt_pct_from_r2(r2): if r2 is None: return "โ€”" try: v = float(r2) if v <= 1.5: v *= 100.0 return f"{v:.1f}%" except Exception: return "โ€”" def fmt_currency(x): try: return f"${float(x):,.2f}" except Exception: return "โ€”" # ---------- pull live data (this_week only) ---------- # This block is assumed to be correct and functional try: last_model = get_model() except Exception as e: last_model = None st.warning(f"Model lookup failed: {e}") try: activity_counts = get_activity_logs(timeframe="this_week") except Exception as e: activity_counts = {} st.warning(f"Activity log lookup failed: {e}") try: overview = get_blends_overview(last_n=5) except Exception as e: overview = {"max_saving": None, "last_blends": pd.DataFrame(), "daily_counts": pd.Series(dtype=int)} st.warning(f"Blends overview failed: {e}") r2_display = fmt_pct_from_r2(None if last_model is None else last_model.get("R2_Score")) preds = fmt_int(activity_counts.get("prediction", 0)) opts = fmt_int(activity_counts.get("optimization", 0)) max_saving_display = fmt_currency(overview.get("max_saving", None)) # ---------- KPI cards ---------- # FIXED: Replaced st.subheader with styled markdown for consistent color st.markdown('

Performance Summary

', unsafe_allow_html=True) k1, k2, k3, k4 = st.columns(4) with k1: st.markdown(f"""
Model Accuracy
{r2_display}
Rยฒ (latest)
""", unsafe_allow_html=True) with k2: st.markdown(f"""
Predictions Made
{preds}
This Week
""", unsafe_allow_html=True) with k3: st.markdown(f"""
Optimizations
{opts}
This Week
""", unsafe_allow_html=True) with k4: st.markdown(f"""
Highest Cost Savings
{max_saving_display}
Per unit fuel
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # ---------- Floating "How to Use" (bigger button + inline content) + compact CSS ---------- # st.markdown(""" # # # # # """, unsafe_allow_html=True) # --- FIX: Removed extra blank lines inside the