Spaces:
Sleeping
Sleeping
Update data_clean_final.py
Browse files- data_clean_final.py +6 -10
data_clean_final.py
CHANGED
|
@@ -15,7 +15,7 @@ llm = AutoModelForCausalLM.from_pretrained(
|
|
| 15 |
|
| 16 |
|
| 17 |
# Function to get data cleaning suggestions from LLM
|
| 18 |
-
def
|
| 19 |
|
| 20 |
examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
|
| 21 |
|
|
@@ -26,9 +26,8 @@ def suggest_llm_fixes_and_fill(column_name, examples):
|
|
| 26 |
|
| 27 |
{examples_text}
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
No explanation or extra text — just the list.
|
| 32 |
"""
|
| 33 |
)
|
| 34 |
|
|
@@ -91,11 +90,8 @@ def clean_data(file_path):
|
|
| 91 |
examples = good_examples + bad_examples
|
| 92 |
|
| 93 |
if examples is not None:
|
| 94 |
-
llm_suggestion =
|
| 95 |
-
suggestions_log.append(
|
| 96 |
-
'col': col,
|
| 97 |
-
'suggestion': llm_suggestion
|
| 98 |
-
})
|
| 99 |
|
| 100 |
# Automatically apply replacements from LLM if in expected format
|
| 101 |
# if suggestions_log:
|
|
@@ -121,6 +117,6 @@ def display_llm_report(suggestions_log):
|
|
| 121 |
for col, suggestion in suggestions_log:
|
| 122 |
st.markdown(f"**Column:** `{col}`")
|
| 123 |
if suggestion:
|
| 124 |
-
|
| 125 |
else:
|
| 126 |
st.write("No suggestions or LLM response error.")
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
# Function to get data cleaning suggestions from LLM
|
| 18 |
+
def suggest_fill_strategies(column_name, examples):
|
| 19 |
|
| 20 |
examples_text = "\n".join([f"- {ex}" for ex in examples if ex])
|
| 21 |
|
|
|
|
| 26 |
|
| 27 |
{examples_text}
|
| 28 |
|
| 29 |
+
Generate a short, bulleted list of suggestions for how a data scientist might fill or impute missing values in this column.
|
| 30 |
+
Only return the bulleted list. Do not include any other text.
|
|
|
|
| 31 |
"""
|
| 32 |
)
|
| 33 |
|
|
|
|
| 90 |
examples = good_examples + bad_examples
|
| 91 |
|
| 92 |
if examples is not None:
|
| 93 |
+
llm_suggestion = suggest_fill_strategies(col, examples)
|
| 94 |
+
suggestions_log.append((col, llm_suggestion))
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
# Automatically apply replacements from LLM if in expected format
|
| 97 |
# if suggestions_log:
|
|
|
|
| 117 |
for col, suggestion in suggestions_log:
|
| 118 |
st.markdown(f"**Column:** `{col}`")
|
| 119 |
if suggestion:
|
| 120 |
+
st.code(suggestion, language="python")
|
| 121 |
else:
|
| 122 |
st.write("No suggestions or LLM response error.")
|