Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| from datetime import datetime | |
| from clean import clean_data, get_numeric_columns | |
| from report import create_full_report, REPORT_DIR | |
| def clean_and_visualize(file, primary_key_column, progress=gr.Progress()): | |
| # Read the CSV file | |
| progress(0.05, desc="Reading CSV file") | |
| df = pd.read_csv(file.name) | |
| # Clean the data | |
| progress(0.1, desc="Starting data cleaning") | |
| cleaned_df, nonconforming_cells_before, process_times = clean_data(df, primary_key_column, progress) | |
| progress(0.8, desc="Data cleaning completed") | |
| # Calculate removed columns and rows | |
| removed_columns = len(df.columns) - len(cleaned_df.columns) | |
| removed_rows = len(df) - len(cleaned_df) | |
| # Generate full visualization report | |
| progress(0.9, desc="Generating report") | |
| create_full_report( | |
| df, | |
| cleaned_df, | |
| nonconforming_cells_before, | |
| process_times, | |
| removed_columns, | |
| removed_rows, | |
| primary_key_column | |
| ) | |
| # Save cleaned data as CSV | |
| progress(0.95, desc="Saving cleaned data") | |
| current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") | |
| cleaned_csv_path = os.path.join(f"cleaned_data_{current_time}.csv") | |
| cleaned_df.to_csv(cleaned_csv_path, index=False) | |
| # Collect all generated images | |
| image_files = [os.path.join(REPORT_DIR, f) for f in os.listdir(REPORT_DIR) if f.endswith('.png')] | |
| progress(1.0, desc="Process completed") | |
| return cleaned_csv_path, image_files | |
| def launch_app(): | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Data Cleaner") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV File", file_count="single", file_types=[".csv"]) | |
| with gr.Row(): | |
| primary_key_dropdown = gr.Dropdown(label="Select Primary Key Column", choices=[], interactive=True) | |
| with gr.Row(): | |
| clean_button = gr.Button("Start Cleaning") | |
| with gr.Row(): | |
| progress_bar = gr.Progress() | |
| with gr.Row(): | |
| cleaned_file_output = gr.File(label="Cleaned CSV", visible=True) | |
| with gr.Row(): | |
| output_gallery = gr.Gallery( | |
| label="Visualization Results", | |
| show_label=True, | |
| elem_id="gallery", | |
| columns=[3], | |
| rows=[3], | |
| object_fit="contain", | |
| height="auto", | |
| visible=False | |
| ) | |
| def update_primary_key_options(file): | |
| if file is None: | |
| return gr.Dropdown(choices=[]) | |
| df = pd.read_csv(file.name) | |
| numeric_columns = get_numeric_columns(df) | |
| return gr.Dropdown(choices=numeric_columns) | |
| def process_and_show_results(file, primary_key_column): | |
| cleaned_csv_path, image_files = clean_and_visualize(file, primary_key_column, progress=progress_bar) | |
| return ( | |
| cleaned_csv_path, | |
| gr.Gallery(visible=True, value=image_files) | |
| ) | |
| file_input.change( | |
| fn=update_primary_key_options, | |
| inputs=file_input, | |
| outputs=primary_key_dropdown | |
| ) | |
| clean_button.click( | |
| fn=process_and_show_results, | |
| inputs=[file_input, primary_key_dropdown], | |
| outputs=[cleaned_file_output, output_gallery] | |
| ) | |
| app.launch() | |
| if __name__ == "__main__": | |
| launch_app() |