Spaces:

translators-will
/

LLM-Data-Cleaner

Sleeping

translators-will commited on Apr 9, 2025

Commit

0e4b101

verified ·

1 Parent(s): 46c89b6

Update data_clean_final.py

Files changed (1) hide show

data_clean_final.py CHANGED Viewed

@@ -15,7 +15,7 @@ llm = AutoModelForCausalLM.from_pretrained(
 # Function to get data cleaning suggestions from LLM
-def suggest_llm_fixes_and_fill(column_name, examples):
     examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
@@ -26,9 +26,8 @@ def suggest_llm_fixes_and_fill(column_name, examples):
         {examples_text}
-        Return ONLY a valid Python list of tuples, like:
-        [("original_value1", "replacement1", "reason"), ("original_value2", "replacement2", "reason"), ...]
-        No explanation or extra text — just the list.
         """
     )
@@ -91,11 +90,8 @@ def clean_data(file_path):
             examples = good_examples + bad_examples
             if examples is not None:
-                llm_suggestion = suggest_llm_fixes_and_fill(col, examples)
-                suggestions_log.append({
-                    'col': col,
-                    'suggestion': llm_suggestion
-                })
                  # Automatically apply replacements from LLM if in expected format
                 # if suggestions_log:
@@ -121,6 +117,6 @@ def display_llm_report(suggestions_log):
         for col, suggestion in suggestions_log:
             st.markdown(f"**Column:** `{col}`")
             if suggestion:
-                    st.code(suggestion, language="python")
             else:
                 st.write("No suggestions or LLM response error.")

 # Function to get data cleaning suggestions from LLM
+def suggest_fill_strategies(column_name, examples):
     examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
         {examples_text}
+        Generate a short, bulleted list of suggestions for how a data scientist might fill or impute missing values in this column.
+        Only return the bulleted list. Do not include any other text.
         """
     )
             examples = good_examples + bad_examples
             if examples is not None:
+                llm_suggestion = suggest_fill_strategies(col, examples)
+                suggestions_log.append((col, llm_suggestion))
                  # Automatically apply replacements from LLM if in expected format
                 # if suggestions_log:
         for col, suggestion in suggestions_log:
             st.markdown(f"**Column:** `{col}`")
             if suggestion:
+                st.code(suggestion, language="python")
             else:
                 st.write("No suggestions or LLM response error.")