data-qa-demo / app.py
POKO44's picture
Create app.py
bc468b3 verified
import streamlit as st
import pandas as pd
import numpy as np
import requests
import os
from io import BytesIO
# -------------------------------
# PAGE CONFIG
# -------------------------------
st.set_page_config(page_title="🚀 Data QA Demo", layout="wide")
st.title("🚀 AI‑Powered Data Quality Checker")
st.markdown("Validate, clean, and showcase datasets in real‑time.")
# Read Alpha Vantage key from Hugging Face Secrets
ALPHA_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
# -------------------------------
# UTILITIES
# -------------------------------
def qa_report(df: pd.DataFrame):
report = {
"Rows": df.shape[0],
"Columns": df.shape[1],
"Missing Values": int(df.isnull().sum().sum()),
"Duplicate Rows": int(df.duplicated().sum()),
"Negative Values": int((df.select_dtypes(include=[np.number]) < 0).sum().sum()),
}
return report
def fetch_alpha_vantage(symbol="BTC", market="USD"):
if not ALPHA_KEY:
return None, "ALPHA_VANTAGE_API_KEY not set. Add it in Settings → Secrets."
url = (
"https://www.alphavantage.co/query"
f"?function=CURRENCY_EXCHANGE_RATE&from_currency={symbol}&to_currency={market}&apikey={ALPHA_KEY}"
)
try:
r = requests.get(url, timeout=15)
r.raise_for_status()
data = r.json()
rate = float(data["Realtime Currency Exchange Rate"]["5. Exchange Rate"])
return rate, None
except Exception as e:
return None, f"API error: {e}"
def download_csv(df: pd.DataFrame, filename="cleaned.csv"):
buffer = BytesIO()
df.to_csv(buffer, index=False)
st.download_button(
label="📥 Download Cleaned CSV",
data=buffer.getvalue(),
file_name=filename,
mime="text/csv",
use_container_width=True,
)
# -------------------------------
# LAYOUT
# -------------------------------
tab1, tab2, tab3 = st.tabs(["📂 Upload CSV", "📊 Live Market Data", "📥 Download Clean Data"])
# -------------------------------
# TAB 1: Upload CSV
# -------------------------------
with tab1:
st.subheader("Upload your dataset")
uploaded = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded:
try:
df = pd.read_csv(uploaded)
st.success("File loaded successfully ✅")
st.dataframe(df.head(), use_container_width=True)
report = qa_report(df)
c1, c2, c3, c4, c5 = st.columns(5)
c1.metric("Rows", report["Rows"])
c2.metric("Columns", report["Columns"])
c3.metric("Missing Values", report["Missing Values"])
c4.metric("Duplicate Rows", report["Duplicate Rows"])
c5.metric("Negative Values", report["Negative Values"])
st.markdown("#### Quick fixes")
colA, colB, colC = st.columns(3)
with colA:
if st.button("Remove duplicate rows"):
df = df.drop_duplicates()
st.info("Duplicates removed.")
with colB:
if st.button("Fill missing numeric with 0"):
num_cols = df.select_dtypes(include=[np.number]).columns
df[num_cols] = df[num_cols].fillna(0)
st.info("Missing numeric values filled with 0.")
with colC:
if st.button("Clamp negatives to 0 (numeric)"):
num_cols = df.select_dtypes(include=[np.number]).columns
df[num_cols] = df[num_cols].clip(lower=0)
st.info("Negative numeric values clamped to 0.")
st.markdown("#### Preview after fixes")
st.dataframe(df.head(), use_container_width=True)
download_csv(df, "cleaned_upload.csv")
except Exception as e:
st.error(f"Failed to read CSV: {e}")
# -------------------------------
# TAB 2: Live Market Data
# -------------------------------
with tab2:
st.subheader("Fetch & Validate Live Crypto Data (Alpha Vantage)")
symbol = st.text_input("Crypto symbol", "BTC")
market = st.text_input("Market currency", "USD")
if st.button("Fetch rate"):
rate, err = fetch_alpha_vantage(symbol, market)
if err:
st.error(err)
elif rate is None:
st.error("No rate returned. Check symbol/market or API limits.")
else:
st.success("Live rate fetched ✅")
col1, col2 = st.columns(2)
col1.metric(f"{symbol}/{market}", f"{rate:.4f}")
# Simple QA: non-negative and reasonable range
is_valid = rate > 0 and rate < 1_000_000
col2.metric("QA Valid", "Yes" if is_valid else "No")
df_live = pd.DataFrame({"Symbol": [symbol], "Market": [market], "Rate": [rate]})
st.dataframe(df_live, use_container_width=True)
download_csv(df_live, "live_rate.csv")
# -------------------------------
# TAB 3: Download Clean Data (Sample)
# -------------------------------
with tab3:
st.subheader("Demo cleaning on sample data")
sample = pd.DataFrame({
"id": [1, 2, 2, 3, 4],
"value": [10, np.nan, 20, -5, 15],
"note": ["ok", "missing", "ok", "neg", None],
})
st.write("Sample data (before):")
st.dataframe(sample, use_container_width=True)
cleaned = (
sample
.drop_duplicates()
.assign(
value=lambda d: d["value"].fillna(0).clip(lower=0),
note=lambda d: d["note"].fillna(""),
)
)
st.write("Cleaned data (after):")
st.dataframe(cleaned, use_container_width=True)
download_csv(cleaned, "cleaned_sample.csv")
# -------------------------------
# FOOTER
# -------------------------------
st.markdown("---")
st.caption("Made for event showcases • Streamlit on Hugging Face Spaces • Alpha Vantage integration")