Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import os | |
| from analyze import analyze_csv | |
| from plan import generate_cleaning_plan | |
| from execute import execute_plan | |
| from insight import generate_insights | |
| from visual_insight import generate_visual_plan | |
| from report import ReportBuilder | |
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| from transformers import AutoTokenizer | |
| # Temp-safe paths | |
| input_path = "/tmp/input.csv" | |
| output_path = "/tmp/output.csv" | |
| report_path = "/tmp/final_report.pdf" | |
| charts_dir = "/tmp/charts" | |
| os.makedirs(charts_dir, exist_ok=True) | |
| # Authenticate and load tokenizer to check access | |
| hf_token = os.environ.get("HUGGINGFACE_TOKEN") | |
| cache_dir = "/tmp/hf_cache" | |
| st.set_page_config(page_title="Smart Data Cleaning Agent", layout="wide") | |
| st.title("π§ Smart Data Cleaning Agent") | |
| uploaded_file = st.file_uploader("π Upload a CSV file", type=["csv"]) | |
| if uploaded_file: | |
| # Save file to /tmp/ for processing | |
| with open(input_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| df = pd.read_csv(input_path) | |
| st.subheader("π Original Data Preview") | |
| st.dataframe(df.head()) | |
| with st.spinner("π Analyzing CSV..."): | |
| analysis = analyze_csv(input_path) | |
| with st.spinner("π§Ό Generating Cleaning Plan..."): | |
| cleaning_plan, cleaning_summary = generate_cleaning_plan(analysis) | |
| st.subheader("π§Ή Cleaning Plan") | |
| st.json(cleaning_plan) | |
| st.markdown("### β Cleaning Summary") | |
| st.markdown(cleaning_summary) | |
| with st.spinner("π§ͺ Applying cleaning..."): | |
| cleaned_df = execute_plan(df.copy(), cleaning_plan) | |
| cleaned_df.to_csv(output_path, index=False) | |
| st.subheader("π§Ό Cleaned Data Preview") | |
| st.dataframe(cleaned_df.head()) | |
| st.download_button("β¬οΈ Download Cleaned CSV", cleaned_df.to_csv(index=False), file_name="cleaned.csv") | |
| with st.spinner("π§ Deriving insights..."): | |
| insights = generate_insights(analysis["columns"]) | |
| st.subheader("π EDA Insights") | |
| st.text(insights) | |
| with st.spinner("π Generating recommended plots..."): | |
| visuals = generate_visual_plan(analysis["columns"]) | |
| for vis in visuals: | |
| st.markdown(f"#### {vis['title']}") | |
| st.markdown(vis['description']) | |
| try: | |
| safe_code = vis["code"].replace("charts/", f"{charts_dir}/") | |
| exec(safe_code, {"df": cleaned_df, "plt": plt, "sns": sns, "os": os}) | |
| st.pyplot(plt.gcf()) | |
| plt.clf() | |
| except Exception as e: | |
| st.error(f"β Failed to render: {e}") | |
| if st.button("π Generate PDF Report"): | |
| report = ReportBuilder(output_path=report_path) | |
| report.add_title("π Smart Data Cleaning Report") | |
| report.add_section("Cleaning Summary", cleaning_summary) | |
| report.add_section("EDA Insights", insights) | |
| for vis in visuals: | |
| if "savefig('" in vis["code"]: | |
| path = vis["code"].split("savefig('")[-1].split("')")[0] | |
| if not path.startswith("/"): | |
| path = os.path.join(charts_dir, os.path.basename(path)) | |
| report.add_plot(path, vis["description"]) | |
| report.save() | |
| with open(report_path, "rb") as f: | |
| st.download_button("β¬οΈ Download PDF Report", f, file_name="smart_data_report.pdf") | |