Spaces:

Haseeb-001
/

smart-data-cleaner

Sleeping

App Files Files Community

Haseeb-001 commited on Jan 28, 2025

Commit

605e4d8

verified ·

1 Parent(s): 26895e4

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -46

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ def clean_with_llama(text):
 # Function to clean and process data
 def process_data(df):
-    # Remove unnecessary data like completely empty rows and columns
     df.dropna(how="all", inplace=True)
     df.dropna(axis=1, how="all", inplace=True)
@@ -80,54 +80,52 @@ def main():
     dataset_url = st.text_input("Paste the URL of the dataset")
     if uploaded_file or dataset_url:
         try:
-            if dataset_url:
-                st.info("Downloading dataset from URL...")
-                uploaded_file = download_dataset(dataset_url)
-            if uploaded_file:
-                st.success("File uploaded successfully!")
-                st.write("Processing the file...")
-                # Load dataset
-                if hasattr(uploaded_file, 'read'):  # Streamlit file upload
-                    df = pd.read_csv(uploaded_file)
-                else:  # File from URL download or local path
-                    with open(uploaded_file, 'r') as file:
-                        df = pd.read_csv(file)
-                st.write("**Original Dataset**")
-                st.dataframe(df)
-                # Process data
-                st.info("Cleaning and simplifying the dataset...")
-                df_cleaned = process_data(df)
-                # Display cleaned data
-                st.write("**Cleaned Dataset**")
-                st.dataframe(df_cleaned)
-                # Chunk data
-                st.info("Creating chunks for AI models...")
-                processed_files = chunk_dataset(df_cleaned)
-                # Allow download of processed chunks
-                st.success(f"Processing complete! {len(processed_files)} chunk(s) created.")
-                for file_name in processed_files:
-                    with open(file_name, 'rb') as file:
-                        st.download_button(
-                            label=f"Download {file_name}",
-                            data=file,
-                            file_name=file_name,
-                            mime="text/csv",
-                        )
-                # Cleanup generated files
-                for file_name in processed_files:
-                    os.remove(file_name)
         except Exception as e:
-            st.error(f"An error occurred: {e}")
 if __name__ == "__main__":
     main()

 # Function to clean and process data
 def process_data(df):
+    # Remove completely empty rows and columns
     df.dropna(how="all", inplace=True)
     df.dropna(axis=1, how="all", inplace=True)
     dataset_url = st.text_input("Paste the URL of the dataset")
     if uploaded_file or dataset_url:
+        if dataset_url:
+            st.info("Downloading dataset from URL...")
+            file_path = download_dataset(dataset_url)
+        else:
+            file_path = uploaded_file  # Use uploaded file
         try:
+            # Load dataset
+            if hasattr(file_path, 'read'):  # For Streamlit uploads
+                df = pd.read_csv(file_path)
+            else:  # For downloaded or local files
+                df = pd.read_csv(file_path)
+            st.success("File uploaded successfully!")
+            st.write("**Original Dataset**")
+            st.dataframe(df)
+            # Process data
+            st.info("Cleaning and simplifying the dataset...")
+            df_cleaned = process_data(df)
+            # Display cleaned data
+            st.write("**Cleaned Dataset**")
+            st.dataframe(df_cleaned)
+            # Chunk data
+            st.info("Creating chunks for AI models...")
+            processed_files = chunk_dataset(df_cleaned)
+            # Allow download of processed chunks
+            st.success(f"Processing complete! {len(processed_files)} chunk(s) created.")
+            for file_name in processed_files:
+                with open(file_name, 'rb') as file:
+                    st.download_button(
+                        label=f"Download {file_name}",
+                        data=file,
+                        file_name=file_name,
+                        mime="text/csv",
+                    )
+            # Cleanup generated files
+            for file_name in processed_files:
+                os.remove(file_name)
         except Exception as e:
+            st.error(f"An error occurred while processing the dataset: {e}")
 if __name__ == "__main__":
     main()