Spaces:

DocSrvNyk
/

SCCOFD_

No application file

App Files Files Community

DocSrvNyk commited on Sep 16, 2023

Commit

e38d45e

1 Parent(s): 38d7b01

Create app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+import pandas as pd
+import os
+import zipfile
+def process_csv(uploaded_file):
+    """
+    Process the uploaded CSV file to:
+    1. Replace text-based columns and numerical columns with less than six unique options with coded values.
+    2. Fill missing values in numerical columns with their respective medians.
+    3. Return a zip file containing the modified CSV file, a legend CSV, and a CSV detailing data fill methods.
+    """
+    # Load the data from the uploaded file's byte stream
+    data = pd.read_csv(uploaded_file.name)
+    # Dictionary to store column name and its mapping of original values to codes
+    legend_dict = {}
+    # List to store the details of columns where data was added
+    data_added_details = []
+    # Loop through each column in the DataFrame
+    for col in data.columns:
+        # Check if the column is of type object (text-based) or if it's numerical with less than six unique options
+        if data[col].dtype == 'object' or (data[col].nunique() < 6 and pd.api.types.is_numeric_dtype(data[col])):
+            # Create a mapping of original values to codes, including NaN or blank values mapped to -9999
+            mapping = {value: code if pd.notna(value) else -9999 for code, value in enumerate(data[col].unique())}
+            legend_dict[col] = mapping
+            # Replace the values in the column with their respective codes
+            data[col] = data[col].map(mapping)
+        elif pd.api.types.is_numeric_dtype(data[col]) and any(pd.isna(data[col])):
+            # Replace with median
+            median_value = data[col].median()
+            data[col].fillna(median_value, inplace=True)
+            data_added_details.append([col, "Median", median_value])
+    # Name of the zip file based on uploaded file name
+    zip_name = "processed_files.zip"
+    # Save CSV files and add them to the zip file
+    with zipfile.ZipFile(zip_name, 'w') as zipf:
+        data.to_csv("modified_data.csv", index=False)
+        zipf.write("modified_data.csv")
+        legend_df = pd.DataFrame(list(legend_dict.items()), columns=['Column', 'Mapping'])
+        legend_df.to_csv("legend.csv", index=False)
+        zipf.write("legend.csv")
+        data_added_df = pd.DataFrame(data_added_details, columns=['Column', 'Method', 'Value Added'])
+        data_added_df.to_csv("data_added_details.csv", index=False)
+        zipf.write("data_added_details.csv")
+    return zip_name
+# Gradio Interface
+iface = gr.Interface(
+    fn=process_csv,
+    inputs=gr.inputs.File(type="file", label="Upload CSV File"),
+    outputs=gr.outputs.File(label="Download Processed Files"),
+    live=False
+)
+iface.launch()