import streamlit as st import pandas as pd from pathlib import Path from demo import load_data, run_checks, format_console_output from llm_utils import generate_summary # --------------------------------------------------------------------------- # Configuration & Setup # --------------------------------------------------------------------------- st.set_page_config( page_title="Analytics Validation Demo", page_icon="📊", layout="wide" ) # --------------------------------------------------------------------------- # Helper Functions # --------------------------------------------------------------------------- @st.cache_data def load_and_verify_data(filepath): """Load data using existing logic from demo.py.""" return load_data(filepath) def display_issues(issues): """Render list of issues as Streamlit alerts.""" if not issues: st.success("✅ No issues detected. Data appears clean.") return st.warning(f"⚠️ Found {len(issues)} issues") for issue in issues: severity_icon = "🔴" if issue["severity"] == "ERROR" else "⚠️" with st.expander(f"{severity_icon} [{issue['severity']}] {issue['type']} in {issue.get('column', 'General')}"): st.write(f"**Detail:** {issue['detail']}") if issue.get("dates"): st.write(f"**Affected Dates:** {', '.join(issue['dates'])}") def display_metrics(stats): """Render Key Metrics in columns.""" st.subheader("Key Metrics") col1, col2 = st.columns(2) with col1: rev = stats["revenue"] st.metric("Total Revenue", f"${rev['total']:,.2f}") st.caption(f"Mean: ${rev['mean']:,.2f} | Missing: {rev['missing_count']}") with col2: ord_ = stats["orders"] st.metric("Total Orders", f"{int(ord_['total']):,}") st.caption(f"Mean: {ord_['mean']:,.0f} | Missing: {ord_['missing_count']}") st.divider() # --------------------------------------------------------------------------- # Main App Layout # --------------------------------------------------------------------------- def main(): st.title("📊 Analytics Validation Engine") st.markdown(""" This demo validates daily business metrics for anomalies, missing data, and consistency errors. It uses a deterministic rule engine and an optional local LLM for narration. """) # Sidebar parameters st.sidebar.header("Configuration") uploaded_file = st.sidebar.file_uploader("Upload CSV", type=["csv"]) # Check for default data if no upload data_path = "data.csv" if uploaded_file is not None: data_path = uploaded_file elif not Path(data_path).exists(): st.error("❌ data.csv not found and no file uploaded.") st.stop() # Load Data try: if uploaded_file: df = pd.read_csv(uploaded_file, parse_dates=["date"]) # Quick re-validation using original logic if possible, or just use pandas directly # Ideally we re-use load_data but it expects a filepath string. # For simplicity in this demo, let's just use the loaded DF if it's from upload, # but we need to ensure it matches schema. if not pd.api.types.is_datetime64_any_dtype(df["date"]): df["date"] = pd.to_datetime(df["date"]) df = df.sort_values("date").reset_index(drop=True) else: df = load_and_verify_data(data_path) st.sidebar.success(f"Loaded {len(df)} rows") except Exception as e: st.error(f"Error loading data: {e}") st.stop() # Run Analysis with st.spinner("Running validation rules..."): results = run_checks(df) # Display Findings # 1. Executive Summary st.header("Executive Summary") # We use a placeholder to update it if we want to stream (future optimization), # but for now synchronous generation is fine. with st.spinner("Generating summary (LLM or Rule-based)..."): summary = generate_summary(results) st.info(summary) # 2. Detailed Issues st.header("Detected Issues") display_issues(results["issues"]) # 3. Data Overview st.header("Data Overview") display_metrics(results["stats"]) with st.expander("View Raw Data"): st.dataframe(df) if __name__ == "__main__": main()