Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| import os | |
| from io import BytesIO | |
| # ------------------------------- | |
| # PAGE CONFIG | |
| # ------------------------------- | |
| st.set_page_config(page_title="🚀 Data QA Demo", layout="wide") | |
| st.title("🚀 AI‑Powered Data Quality Checker") | |
| st.markdown("Validate, clean, and showcase datasets in real‑time.") | |
| # Read Alpha Vantage key from Hugging Face Secrets | |
| ALPHA_KEY = os.getenv("ALPHA_VANTAGE_API_KEY") | |
| # ------------------------------- | |
| # UTILITIES | |
| # ------------------------------- | |
| def qa_report(df: pd.DataFrame): | |
| report = { | |
| "Rows": df.shape[0], | |
| "Columns": df.shape[1], | |
| "Missing Values": int(df.isnull().sum().sum()), | |
| "Duplicate Rows": int(df.duplicated().sum()), | |
| "Negative Values": int((df.select_dtypes(include=[np.number]) < 0).sum().sum()), | |
| } | |
| return report | |
| def fetch_alpha_vantage(symbol="BTC", market="USD"): | |
| if not ALPHA_KEY: | |
| return None, "ALPHA_VANTAGE_API_KEY not set. Add it in Settings → Secrets." | |
| url = ( | |
| "https://www.alphavantage.co/query" | |
| f"?function=CURRENCY_EXCHANGE_RATE&from_currency={symbol}&to_currency={market}&apikey={ALPHA_KEY}" | |
| ) | |
| try: | |
| r = requests.get(url, timeout=15) | |
| r.raise_for_status() | |
| data = r.json() | |
| rate = float(data["Realtime Currency Exchange Rate"]["5. Exchange Rate"]) | |
| return rate, None | |
| except Exception as e: | |
| return None, f"API error: {e}" | |
| def download_csv(df: pd.DataFrame, filename="cleaned.csv"): | |
| buffer = BytesIO() | |
| df.to_csv(buffer, index=False) | |
| st.download_button( | |
| label="📥 Download Cleaned CSV", | |
| data=buffer.getvalue(), | |
| file_name=filename, | |
| mime="text/csv", | |
| use_container_width=True, | |
| ) | |
| # ------------------------------- | |
| # LAYOUT | |
| # ------------------------------- | |
| tab1, tab2, tab3 = st.tabs(["📂 Upload CSV", "📊 Live Market Data", "📥 Download Clean Data"]) | |
| # ------------------------------- | |
| # TAB 1: Upload CSV | |
| # ------------------------------- | |
| with tab1: | |
| st.subheader("Upload your dataset") | |
| uploaded = st.file_uploader("Upload CSV file", type=["csv"]) | |
| if uploaded: | |
| try: | |
| df = pd.read_csv(uploaded) | |
| st.success("File loaded successfully ✅") | |
| st.dataframe(df.head(), use_container_width=True) | |
| report = qa_report(df) | |
| c1, c2, c3, c4, c5 = st.columns(5) | |
| c1.metric("Rows", report["Rows"]) | |
| c2.metric("Columns", report["Columns"]) | |
| c3.metric("Missing Values", report["Missing Values"]) | |
| c4.metric("Duplicate Rows", report["Duplicate Rows"]) | |
| c5.metric("Negative Values", report["Negative Values"]) | |
| st.markdown("#### Quick fixes") | |
| colA, colB, colC = st.columns(3) | |
| with colA: | |
| if st.button("Remove duplicate rows"): | |
| df = df.drop_duplicates() | |
| st.info("Duplicates removed.") | |
| with colB: | |
| if st.button("Fill missing numeric with 0"): | |
| num_cols = df.select_dtypes(include=[np.number]).columns | |
| df[num_cols] = df[num_cols].fillna(0) | |
| st.info("Missing numeric values filled with 0.") | |
| with colC: | |
| if st.button("Clamp negatives to 0 (numeric)"): | |
| num_cols = df.select_dtypes(include=[np.number]).columns | |
| df[num_cols] = df[num_cols].clip(lower=0) | |
| st.info("Negative numeric values clamped to 0.") | |
| st.markdown("#### Preview after fixes") | |
| st.dataframe(df.head(), use_container_width=True) | |
| download_csv(df, "cleaned_upload.csv") | |
| except Exception as e: | |
| st.error(f"Failed to read CSV: {e}") | |
| # ------------------------------- | |
| # TAB 2: Live Market Data | |
| # ------------------------------- | |
| with tab2: | |
| st.subheader("Fetch & Validate Live Crypto Data (Alpha Vantage)") | |
| symbol = st.text_input("Crypto symbol", "BTC") | |
| market = st.text_input("Market currency", "USD") | |
| if st.button("Fetch rate"): | |
| rate, err = fetch_alpha_vantage(symbol, market) | |
| if err: | |
| st.error(err) | |
| elif rate is None: | |
| st.error("No rate returned. Check symbol/market or API limits.") | |
| else: | |
| st.success("Live rate fetched ✅") | |
| col1, col2 = st.columns(2) | |
| col1.metric(f"{symbol}/{market}", f"{rate:.4f}") | |
| # Simple QA: non-negative and reasonable range | |
| is_valid = rate > 0 and rate < 1_000_000 | |
| col2.metric("QA Valid", "Yes" if is_valid else "No") | |
| df_live = pd.DataFrame({"Symbol": [symbol], "Market": [market], "Rate": [rate]}) | |
| st.dataframe(df_live, use_container_width=True) | |
| download_csv(df_live, "live_rate.csv") | |
| # ------------------------------- | |
| # TAB 3: Download Clean Data (Sample) | |
| # ------------------------------- | |
| with tab3: | |
| st.subheader("Demo cleaning on sample data") | |
| sample = pd.DataFrame({ | |
| "id": [1, 2, 2, 3, 4], | |
| "value": [10, np.nan, 20, -5, 15], | |
| "note": ["ok", "missing", "ok", "neg", None], | |
| }) | |
| st.write("Sample data (before):") | |
| st.dataframe(sample, use_container_width=True) | |
| cleaned = ( | |
| sample | |
| .drop_duplicates() | |
| .assign( | |
| value=lambda d: d["value"].fillna(0).clip(lower=0), | |
| note=lambda d: d["note"].fillna(""), | |
| ) | |
| ) | |
| st.write("Cleaned data (after):") | |
| st.dataframe(cleaned, use_container_width=True) | |
| download_csv(cleaned, "cleaned_sample.csv") | |
| # ------------------------------- | |
| # FOOTER | |
| # ------------------------------- | |
| st.markdown("---") | |
| st.caption("Made for event showcases • Streamlit on Hugging Face Spaces • Alpha Vantage integration") | |