translators-will commited on
Commit
0e4b101
·
verified ·
1 Parent(s): 46c89b6

Update data_clean_final.py

Browse files
Files changed (1) hide show
  1. data_clean_final.py +6 -10
data_clean_final.py CHANGED
@@ -15,7 +15,7 @@ llm = AutoModelForCausalLM.from_pretrained(
15
 
16
 
17
  # Function to get data cleaning suggestions from LLM
18
- def suggest_llm_fixes_and_fill(column_name, examples):
19
 
20
  examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
21
 
@@ -26,9 +26,8 @@ def suggest_llm_fixes_and_fill(column_name, examples):
26
 
27
  {examples_text}
28
 
29
- Return ONLY a valid Python list of tuples, like:
30
- [("original_value1", "replacement1", "reason"), ("original_value2", "replacement2", "reason"), ...]
31
- No explanation or extra text — just the list.
32
  """
33
  )
34
 
@@ -91,11 +90,8 @@ def clean_data(file_path):
91
  examples = good_examples + bad_examples
92
 
93
  if examples is not None:
94
- llm_suggestion = suggest_llm_fixes_and_fill(col, examples)
95
- suggestions_log.append({
96
- 'col': col,
97
- 'suggestion': llm_suggestion
98
- })
99
 
100
  # Automatically apply replacements from LLM if in expected format
101
  # if suggestions_log:
@@ -121,6 +117,6 @@ def display_llm_report(suggestions_log):
121
  for col, suggestion in suggestions_log:
122
  st.markdown(f"**Column:** `{col}`")
123
  if suggestion:
124
- st.code(suggestion, language="python")
125
  else:
126
  st.write("No suggestions or LLM response error.")
 
15
 
16
 
17
  # Function to get data cleaning suggestions from LLM
18
+ def suggest_fill_strategies(column_name, examples):
19
 
20
  examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
21
 
 
26
 
27
  {examples_text}
28
 
29
+ Generate a short, bulleted list of suggestions for how a data scientist might fill or impute missing values in this column.
30
+ Only return the bulleted list. Do not include any other text.
 
31
  """
32
  )
33
 
 
90
  examples = good_examples + bad_examples
91
 
92
  if examples is not None:
93
+ llm_suggestion = suggest_fill_strategies(col, examples)
94
+ suggestions_log.append((col, llm_suggestion))
 
 
 
95
 
96
  # Automatically apply replacements from LLM if in expected format
97
  # if suggestions_log:
 
117
  for col, suggestion in suggestions_log:
118
  st.markdown(f"**Column:** `{col}`")
119
  if suggestion:
120
+ st.code(suggestion, language="python")
121
  else:
122
  st.write("No suggestions or LLM response error.")