Spaces:

Osnly
/

smart-data-cleaning-agent

Sleeping

App Files Files Community

Osnly commited on Jul 1, 2025

Commit

583755a

verified ·

1 Parent(s): 49436c8

Update src/visual_insight.py

Browse files

Files changed (1) hide show

src/visual_insight.py +55 -52

src/visual_insight.py CHANGED Viewed

@@ -1,52 +1,55 @@
-# visual_insight.py
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-import json
-import re
-model_id = "google/gemma-3n-E4B-it"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
-def call_llm(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
-    outputs = model.generate(**inputs, max_new_tokens=2048)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-visual_prompt = """
-You are a data visualization expert. You will be given a summary of a cleaned dataset.
-Your tasks:
-1. Suggest 3–5 interesting visualizations that would help uncover patterns or relationships.
-2. For each, describe what insight it may reveal.
-3. For each, write Python code using pandas/seaborn/matplotlib to generate the plot. Use 'df' as the dataframe and be precise with column names.
-4. Always be careful and precise with column names
-Output JSON in this exact format:
-{
-  "visualizations": [
-    {
-      "title": "Histogram of Age",
-      "description": "Shows the distribution of age",
-      "code": "sns.histplot(df['age'], kde=True); plt.title('Age Distribution'); plt.savefig('charts/age.png'); plt.clf()"
-    },
-    ...
-  ]
-}
-Dataset Summary:
-{column_data}
-"""
-def generate_visual_plan(column_data):
-    prompt = visual_prompt.format(column_data=json.dumps(column_data, indent=2))
-    response = call_llm(prompt)
-    match = re.search(r"\{.*\}", response, re.DOTALL)
-    if match:
-        try:
-            parsed = json.loads(match.group(0))
-            return parsed["visualizations"]
-        except:
-            print("Failed to parse visualization JSON.")
-            print(response)
-    return []

+# visual_insight.py
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import json
+import re
+import os
+model_id = "google/gemma-3n-E4B-it"
+hf_token = os.environ.get("HUGGINGFACE_TOKEN")
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
+def call_llm(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+    outputs = model.generate(**inputs, max_new_tokens=2048)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+visual_prompt = """
+You are a data visualization expert. You will be given a summary of a cleaned dataset.
+Your tasks:
+1. Suggest 3–5 interesting visualizations that would help uncover patterns or relationships.
+2. For each, describe what insight it may reveal.
+3. For each, write Python code using pandas/seaborn/matplotlib to generate the plot. Use 'df' as the dataframe and be precise with column names.
+4. Always be careful and precise with column names
+Output JSON in this exact format:
+{
+  "visualizations": [
+    {
+      "title": "Histogram of Age",
+      "description": "Shows the distribution of age",
+      "code": "sns.histplot(df['age'], kde=True); plt.title('Age Distribution'); plt.savefig('charts/age.png'); plt.clf()"
+    },
+    ...
+  ]
+}
+Dataset Summary:
+{column_data}
+"""
+def generate_visual_plan(column_data):
+    prompt = visual_prompt.format(column_data=json.dumps(column_data, indent=2))
+    response = call_llm(prompt)
+    match = re.search(r"\{.*\}", response, re.DOTALL)
+    if match:
+        try:
+            parsed = json.loads(match.group(0))
+            return parsed["visualizations"]
+        except:
+            print("Failed to parse visualization JSON.")
+            print(response)
+    return []