import streamlit as st import pandas as pd import numpy as np import requests import os from io import BytesIO # ------------------------------- # PAGE CONFIG # ------------------------------- st.set_page_config(page_title="🚀 Data QA Demo", layout="wide") st.title("🚀 AI‑Powered Data Quality Checker") st.markdown("Validate, clean, and showcase datasets in real‑time.") # Read Alpha Vantage key from Hugging Face Secrets ALPHA_KEY = os.getenv("ALPHA_VANTAGE_API_KEY") # ------------------------------- # UTILITIES # ------------------------------- def qa_report(df: pd.DataFrame): report = { "Rows": df.shape[0], "Columns": df.shape[1], "Missing Values": int(df.isnull().sum().sum()), "Duplicate Rows": int(df.duplicated().sum()), "Negative Values": int((df.select_dtypes(include=[np.number]) < 0).sum().sum()), } return report def fetch_alpha_vantage(symbol="BTC", market="USD"): if not ALPHA_KEY: return None, "ALPHA_VANTAGE_API_KEY not set. Add it in Settings → Secrets." url = ( "https://www.alphavantage.co/query" f"?function=CURRENCY_EXCHANGE_RATE&from_currency={symbol}&to_currency={market}&apikey={ALPHA_KEY}" ) try: r = requests.get(url, timeout=15) r.raise_for_status() data = r.json() rate = float(data["Realtime Currency Exchange Rate"]["5. Exchange Rate"]) return rate, None except Exception as e: return None, f"API error: {e}" def download_csv(df: pd.DataFrame, filename="cleaned.csv"): buffer = BytesIO() df.to_csv(buffer, index=False) st.download_button( label="📥 Download Cleaned CSV", data=buffer.getvalue(), file_name=filename, mime="text/csv", use_container_width=True, ) # ------------------------------- # LAYOUT # ------------------------------- tab1, tab2, tab3 = st.tabs(["📂 Upload CSV", "📊 Live Market Data", "📥 Download Clean Data"]) # ------------------------------- # TAB 1: Upload CSV # ------------------------------- with tab1: st.subheader("Upload your dataset") uploaded = st.file_uploader("Upload CSV file", type=["csv"]) if uploaded: try: df = pd.read_csv(uploaded) st.success("File loaded successfully ✅") st.dataframe(df.head(), use_container_width=True) report = qa_report(df) c1, c2, c3, c4, c5 = st.columns(5) c1.metric("Rows", report["Rows"]) c2.metric("Columns", report["Columns"]) c3.metric("Missing Values", report["Missing Values"]) c4.metric("Duplicate Rows", report["Duplicate Rows"]) c5.metric("Negative Values", report["Negative Values"]) st.markdown("#### Quick fixes") colA, colB, colC = st.columns(3) with colA: if st.button("Remove duplicate rows"): df = df.drop_duplicates() st.info("Duplicates removed.") with colB: if st.button("Fill missing numeric with 0"): num_cols = df.select_dtypes(include=[np.number]).columns df[num_cols] = df[num_cols].fillna(0) st.info("Missing numeric values filled with 0.") with colC: if st.button("Clamp negatives to 0 (numeric)"): num_cols = df.select_dtypes(include=[np.number]).columns df[num_cols] = df[num_cols].clip(lower=0) st.info("Negative numeric values clamped to 0.") st.markdown("#### Preview after fixes") st.dataframe(df.head(), use_container_width=True) download_csv(df, "cleaned_upload.csv") except Exception as e: st.error(f"Failed to read CSV: {e}") # ------------------------------- # TAB 2: Live Market Data # ------------------------------- with tab2: st.subheader("Fetch & Validate Live Crypto Data (Alpha Vantage)") symbol = st.text_input("Crypto symbol", "BTC") market = st.text_input("Market currency", "USD") if st.button("Fetch rate"): rate, err = fetch_alpha_vantage(symbol, market) if err: st.error(err) elif rate is None: st.error("No rate returned. Check symbol/market or API limits.") else: st.success("Live rate fetched ✅") col1, col2 = st.columns(2) col1.metric(f"{symbol}/{market}", f"{rate:.4f}") # Simple QA: non-negative and reasonable range is_valid = rate > 0 and rate < 1_000_000 col2.metric("QA Valid", "Yes" if is_valid else "No") df_live = pd.DataFrame({"Symbol": [symbol], "Market": [market], "Rate": [rate]}) st.dataframe(df_live, use_container_width=True) download_csv(df_live, "live_rate.csv") # ------------------------------- # TAB 3: Download Clean Data (Sample) # ------------------------------- with tab3: st.subheader("Demo cleaning on sample data") sample = pd.DataFrame({ "id": [1, 2, 2, 3, 4], "value": [10, np.nan, 20, -5, 15], "note": ["ok", "missing", "ok", "neg", None], }) st.write("Sample data (before):") st.dataframe(sample, use_container_width=True) cleaned = ( sample .drop_duplicates() .assign( value=lambda d: d["value"].fillna(0).clip(lower=0), note=lambda d: d["note"].fillna(""), ) ) st.write("Cleaned data (after):") st.dataframe(cleaned, use_container_width=True) download_csv(cleaned, "cleaned_sample.csv") # ------------------------------- # FOOTER # ------------------------------- st.markdown("---") st.caption("Made for event showcases • Streamlit on Hugging Face Spaces • Alpha Vantage integration")