import streamlit as st import pandas as pd from pathlib import Path # Try to import from your uploaded files. # If they are missing, we will show a helpful error in the app. try: from demo import run_checks, format_console_output from llm_utils import generate_summary except ImportError as e: st.error(f"❌ Could not import modules: {e}") st.info("Make sure you have uploaded 'demo.py' and 'llm_utils.py' to your Space!") st.stop() # --------------------------------------------------------------------------- # Configuration & Setup # --------------------------------------------------------------------------- st.set_page_config( page_title="Analytics Validation Demo", page_icon="📊", layout="wide" ) # --------------------------------------------------------------------------- # Data Loading Helper # --------------------------------------------------------------------------- @st.cache_data def load_data_cached(filepath): # We copy the load_data logic here or import it if compatible # Importing is better to keep logic in one place from demo import load_data return load_data(filepath) # --------------------------------------------------------------------------- # UI Helpers # --------------------------------------------------------------------------- def display_issues(issues): """Render list of issues as Streamlit alerts.""" if not issues: st.success("✅ No issues detected. Data appears clean.") return st.warning(f"⚠️ Found {len(issues)} issues") for issue in issues: severity_icon = "🔴" if issue["severity"] == "ERROR" else "⚠️" with st.expander(f"{severity_icon} [{issue['severity']}] {issue['type']} in {issue.get('column', 'General')}"): st.write(f"**Detail:** {issue['detail']}") if issue.get("dates"): st.write(f"**Affected Dates:** {', '.join(issue['dates'])}") def display_metrics(stats): """Render Key Metrics in columns.""" st.subheader("Key Metrics") col1, col2 = st.columns(2) with col1: rev = stats["revenue"] st.metric("Total Revenue", f"${rev['total']:,.2f}") st.caption(f"Mean: ${rev['mean']:,.2f} | Missing: {rev['missing_count']}") with col2: ord_ = stats["orders"] st.metric("Total Orders", f"{int(ord_['total']):,}") st.caption(f"Mean: {ord_['mean']:,.0f} | Missing: {ord_['missing_count']}") st.divider() # --------------------------------------------------------------------------- # Main App Layout # --------------------------------------------------------------------------- def main(): st.title("📊 Analytics Validation Engine") st.markdown(""" This demo validates daily business metrics for anomalies, missing data, and consistency errors. It uses a deterministic rule engine and an optional local LLM for narration. """) # Sidebar parameters st.sidebar.header("Configuration") uploaded_file = st.sidebar.file_uploader("Upload CSV", type=["csv"]) # Check for default data if no upload data_path = "data.csv" if uploaded_file is not None: data_path = uploaded_file elif not Path(data_path).exists(): st.error("❌ `data.csv` not found and no file was uploaded.") st.info("Please upload a CSV file in the sidebar or add `data.csv` to your Space files.") st.stop() # Load Data try: if uploaded_file: df = pd.read_csv(uploaded_file, parse_dates=["date"]) if not pd.api.types.is_datetime64_any_dtype(df["date"]): df["date"] = pd.to_datetime(df["date"]) df = df.sort_values("date").reset_index(drop=True) else: df = load_data_cached(data_path) st.sidebar.success(f"Loaded {len(df)} rows") except Exception as e: st.error(f"Error loading data: {e}") st.stop() # Run Analysis with st.spinner("Running validation rules..."): results = run_checks(df) # Display Findings # 1. Executive Summary st.header("Executive Summary") with st.spinner("Generating summary..."): summary = generate_summary(results) st.info(summary) # 2. Detailed Issues st.header("Detected Issues") display_issues(results["issues"]) # 3. Data Overview st.header("Data Overview") display_metrics(results["stats"]) with st.expander("View Raw Data"): st.dataframe(df) if __name__ == "__main__": main()