AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 18, 2025

Commit

a175c5f

verified ·

1 Parent(s): fcdbea4

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -10

app.py CHANGED Viewed

@@ -132,16 +132,16 @@ def analyze_data(csv_file, additional_notes=""):
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
-    # Load and clean the data BEFORE passing to the agent
     try:
         df = pd.read_csv(csv_file)
         df = clean_data(df)
     except Exception as e:
         return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
-    # Save cleaned data to a temporary file
-    tmp_cleaned = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w')
-    df.to_csv(tmp_cleaned.name, index=False)
     # Clear or create figures folder
     if os.path.exists('./figures'):
@@ -153,17 +153,19 @@ def analyze_data(csv_file, additional_notes=""):
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
-        "source_file": tmp_cleaned.name
     })
-    # Initialize agent
     agent = CodeAgent(
         tools=[],
         model=model,
-        additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
     )
-    # Run the agent on the cleaned file
     raw_output = agent.run("""
         You are a data analysis agent. Follow these instructions EXACTLY:
         1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
@@ -171,7 +173,7 @@ def analyze_data(csv_file, additional_notes=""):
         3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
         4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
         5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
-        6. Run agent efficiently and remove repetitive task and complete in less than 40 seconds.
         7. Output ONLY the following JSON code block format, exactly:
         {
             'observations': {
@@ -183,7 +185,10 @@ def analyze_data(csv_file, additional_notes=""):
                 ...
             }
         }
-    """, additional_args={"additional_notes": additional_notes, "source_file": tmp_cleaned})
     # Parse output
     parsed_result = extract_json_from_codeagent_output(raw_output) or {

     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
+    # Clean the uploaded CSV file
     try:
         df = pd.read_csv(csv_file)
         df = clean_data(df)
     except Exception as e:
         return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
+    # Save cleaned CSV to disk (using a stable location)
+    cleaned_csv_path = "./cleaned_data.csv"
+    df.to_csv(cleaned_csv_path, index=False)
     # Clear or create figures folder
     if os.path.exists('./figures'):
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
+        "source_file": cleaned_csv_path
     })
+    # CodeAgent instance
     agent = CodeAgent(
         tools=[],
         model=model,
+        additional_authorized_imports=[
+            "numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"
+        ]
     )
+    # Run agent on cleaned CSV
     raw_output = agent.run("""
         You are a data analysis agent. Follow these instructions EXACTLY:
         1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
         3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
         4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
         5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
+        6. Run only 5 iteration and return output quickly.
         7. Output ONLY the following JSON code block format, exactly:
         {
             'observations': {
                 ...
             }
         }
+    """, additional_args={"additional_notes": additional_notes, "source_file": cleaned_csv_path})
     # Parse output
     parsed_result = extract_json_from_codeagent_output(raw_output) or {