Spaces:

Osnly
/

smart-data-cleaning-agent

Sleeping

Osnly commited on Jul 1, 2025

Commit

f38d4d9

verified ·

1 Parent(s): 1a8550a

Update src/visual_insight.py

Files changed (1) hide show

src/visual_insight.py CHANGED Viewed

@@ -1,14 +1,19 @@
-# visual_insight.py
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import json
 import re
 import os
 model_id = "google/gemma-3n-E4B-it"
-hf_token = os.environ.get("HUGGINGFACE_TOKEN")
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
 model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
 def call_llm(prompt):
@@ -17,27 +22,8 @@ def call_llm(prompt):
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 visual_prompt = """
-You are a data visualization expert. You will be given a summary of a cleaned dataset.
-Your tasks:
-1. Suggest 3–5 interesting visualizations that would help uncover patterns or relationships.
-2. For each, describe what insight it may reveal.
-3. For each, write Python code using pandas/seaborn/matplotlib to generate the plot. Use 'df' as the dataframe and be precise with column names.
-4. Always be careful and precise with column names
-Output JSON in this exact format:
-{
-  "visualizations": [
-    {
-      "title": "Histogram of Age",
-      "description": "Shows the distribution of age",
-      "code": "sns.histplot(df['age'], kde=True); plt.title('Age Distribution'); plt.savefig('charts/age.png'); plt.clf()"
-    },
-    ...
-  ]
-}
-Dataset Summary:
-{column_data}
 """
 def generate_visual_plan(column_data):

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import json
 import re
 import os
 model_id = "google/gemma-3n-E4B-it"
+# Set Hugging Face cache directory
+HF_CACHE_DIR = "./hf_cache"
+os.environ["HF_HOME"] = HF_CACHE_DIR
+os.environ["TRANSFORMERS_CACHE"] = HF_CACHE_DIR
+os.makedirs(HF_CACHE_DIR, exist_ok=True)
+hf_token = os.environ.get("HUGGINGFACE_TOKEN")
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, use_auth_token=True)
 model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
 def call_llm(prompt):
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 visual_prompt = """
+You are a data visualization expert...
+[TRUNCATED for brevity, use your full original template]
 """
 def generate_visual_plan(column_data):