translators-will commited on
Commit
ce50072
·
verified ·
1 Parent(s): 9aae495

Update data_clean_simple.py

Browse files
Files changed (1) hide show
  1. data_clean_simple.py +13 -1
data_clean_simple.py CHANGED
@@ -53,6 +53,7 @@ def suggest_fill_strategies(column_name, examples):
53
  def clean_data(file_path):
54
  # Support CSV and TSV files
55
  # Load data and drop duplicates
 
56
  if file_path.endswith('.tsv'):
57
  df = pd.read_csv(file_path, sep='\t').drop_duplicates().copy()
58
  else:
@@ -61,7 +62,12 @@ def clean_data(file_path):
61
  suggestions_log = []
62
 
63
  # Convert column types
64
- for col in df.columns:
 
 
 
 
 
65
  if df[col].dtype == 'object':
66
  df[col] = df[col].str.strip().str.lower() # Normalize text
67
 
@@ -111,6 +117,12 @@ def clean_data(file_path):
111
  'suggestion': suggestion
112
  })
113
 
 
 
 
 
 
 
114
  df = df.reset_index(drop=True)
115
 
116
  return df, suggestions_log
 
53
  def clean_data(file_path):
54
  # Support CSV and TSV files
55
  # Load data and drop duplicates
56
+ # Clean data with progress updates
57
  if file_path.endswith('.tsv'):
58
  df = pd.read_csv(file_path, sep='\t').drop_duplicates().copy()
59
  else:
 
62
  suggestions_log = []
63
 
64
  # Convert column types
65
+ for i, col in enumerate(df.columns):
66
+ # Update progress if callback provided
67
+ if progress_callback:
68
+ progress = i / total_columns
69
+ progress_callback(progress)
70
+
71
  if df[col].dtype == 'object':
72
  df[col] = df[col].str.strip().str.lower() # Normalize text
73
 
 
117
  'suggestion': suggestion
118
  })
119
 
120
+ # Final progress update
121
+ if progress_callback:
122
+ progress_callback(1.0)
123
+
124
+ # Reset index for consistency
125
+
126
  df = df.reset_index(drop=True)
127
 
128
  return df, suggestions_log