Spaces:

translators-will
/

Data-Cleaner

Sleeping

translators-will commited on Apr 9, 2025

Commit

ce50072

verified ·

1 Parent(s): 9aae495

Update data_clean_simple.py

Files changed (1) hide show

data_clean_simple.py CHANGED Viewed

@@ -53,6 +53,7 @@ def suggest_fill_strategies(column_name, examples):
 def clean_data(file_path):
     # Support CSV and TSV files
     # Load data and drop duplicates
     if file_path.endswith('.tsv'):
         df = pd.read_csv(file_path, sep='\t').drop_duplicates().copy()
     else:
@@ -61,7 +62,12 @@ def clean_data(file_path):
     suggestions_log = []
     # Convert column types
-    for col in df.columns:
         if df[col].dtype == 'object':
             df[col] = df[col].str.strip().str.lower()  # Normalize text
@@ -111,6 +117,12 @@ def clean_data(file_path):
                     'suggestion': suggestion
                 })
     df = df.reset_index(drop=True)
     return df, suggestions_log

 def clean_data(file_path):
     # Support CSV and TSV files
     # Load data and drop duplicates
+    # Clean data with progress updates
     if file_path.endswith('.tsv'):
         df = pd.read_csv(file_path, sep='\t').drop_duplicates().copy()
     else:
     suggestions_log = []
     # Convert column types
+    for i, col in enumerate(df.columns):
+        # Update progress if callback provided
+        if progress_callback:
+            progress = i / total_columns
+            progress_callback(progress)
         if df[col].dtype == 'object':
             df[col] = df[col].str.strip().str.lower()  # Normalize text
                     'suggestion': suggestion
                 })
+    # Final progress update
+    if progress_callback:
+        progress_callback(1.0)
+    # Reset index for consistency
     df = df.reset_index(drop=True)
     return df, suggestions_log