AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 18, 2025

Commit

0a2c5ae

verified ·

1 Parent(s): e989ad4

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -46

app.py CHANGED Viewed

@@ -95,84 +95,89 @@ def extract_json_from_codeagent_output(raw_output):
     return {"error": "Failed to extract structured JSON"}
 def analyze_data(csv_file, additional_notes=""):
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
-    # Load and trim dataset
-    df = pd.read_csv(csv_file)
-    df_trimmed = df.iloc[:300, :10]  # Limit rows and columns for performance
-    temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
-    df_trimmed.to_csv(temp_path, index=False)
-    # Clear figures
     if os.path.exists('./figures'):
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
-    # Start W&B
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
-        "source_file": csv_file.name
     })
-    # Create CodeAgent instance
     agent = CodeAgent(
         tools=[],
-        model=model,
         additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
     )
-    prompt = f"""
         You are a helpful data analysis agent. Follow these instructions EXACTLY:
-        1. Load the data from `source_file` ONLY.
-        2. Generate up to 3 observations and 3 visualizations.
-        3. Save all figures to ./figures as PNGs using matplotlib/seaborn.
-        4. Use only: pandas, numpy, matplotlib.pyplot, seaborn, json.
-        5. ⚠️ Output ONLY the following JSON format inside a single code block:
-        {{
-            "observations": {{
-                "key": "value"
-            }},
-            "insights": {{
-                "key": "value"
-            }}
-        }}
-        6. Do not include comments or narration.
-        7. Complete the analysis quickly (limit iterations).
-    """
-    try:
-        raw_output = agent.run(prompt, additional_args={
-            "source_file": open(temp_path, "rb"),
-            "additional_notes": additional_notes
-        })
-        parsed_result = extract_json_from_codeagent_output(raw_output)
-    except Exception as e:
-        print(f"[analyze_data] Agent failed: {e}")
-        parsed_result = {"error": str(e)}
-    # Log performance
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory
-    wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
-    # Upload visuals
     visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
     for viz in visuals:
         wandb.log({os.path.basename(viz): wandb.Image(viz)})
     run.finish()
-    return {
-        "summary": parsed_result,
-        "visuals": visuals,
-        "execution_time_sec": round(execution_time, 2),
-        "memory_usage_mb": round(memory_usage, 2)
-    }

     return {"error": "Failed to extract structured JSON"}
 def analyze_data(csv_file, additional_notes=""):
+    import time, os, shutil, psutil, json
+    from pathlib import Path
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
+    # Clear or create figures folder
     if os.path.exists('./figures'):
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
+        "source_file": csv_file.name if csv_file else None
     })
     agent = CodeAgent(
         tools=[],
+        model=model,
         additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
     )
+    # Run the CodeAgent
+    raw_output = agent.run("""
         You are a helpful data analysis agent. Follow these instructions EXACTLY:
+        1. Load the data from the given `source_file` ONLY.
+        2. Analyze the data structure and generate up to 5 visualizations and 5 insights.
+        3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
+        4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
+        5. DO NOT return any explanations, thoughts, or narration outside the final output block.
+        6. Run only 5 iteration and return output quickly.
+        ⚠️ Output ONLY the following code block format, exactly:
+        {
+            'observations': {
+                'observation_1_key': 'observation_1_value',
+                ...
+            },
+            'insights': {
+                'insight_1_key': 'insight_1_value',
+                ...
+            }
+        }
+    """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
+    # Parse agent output
+    parsed_result = extract_json_from_codeagent_output(raw_output) or {
+        "error": "Failed to extract structured JSON"
+    }
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory
+    wandb.log({
+        "execution_time_sec": round(execution_time, 2),
+        "memory_usage_mb": round(memory_usage, 2)
+    })
+    # Collect generated visualizations
     visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
     for viz in visuals:
         wandb.log({os.path.basename(viz): wandb.Image(viz)})
     run.finish()
+    # Generate summary HTML
+    summary_html = "<h3>📊 Data Analysis Summary</h3>"
+    if "observations" in parsed_result:
+        summary_html += "<h4>🔍 Observations</h4><ul>" + "".join(
+            f"<li><b>{k}:</b> {v}</li>" for k, v in parsed_result["observations"].items()
+        ) + "</ul>"
+    if "insights" in parsed_result:
+        summary_html += "<h4>💡 Insights</h4><ul>" + "".join(
+            f"<li><b>{k}:</b> {v}</li>" for k, v in parsed_result["insights"].items()
+        ) + "</ul>"
+    if "error" in parsed_result:
+        summary_html += f"<p style='color:red'><b>Error:</b> {parsed_result['error']}</p>"
+    # Return summary HTML and visual paths for gr.HTML + gr.Gallery
+    return summary_html, visuals