AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 12, 2025

Commit

af78f21

verified ·

1 Parent(s): 4b4c2f9

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -82

app.py CHANGED Viewed

@@ -43,88 +43,109 @@ def upload_file(file):
     df_global = df
     return df.head()
-import textwrap
-additional_notes = "Please note: Perform a comprehensive analysis including visualizations and insights."
-# Initialize the agent
-agent = CodeAgent(
-    tools=[],
-    model=model,
-    additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "os", "json"]
-)
-# Gradio Gallery and visualization output
-visual_output = gr.Gallery(label="Generated Visualizations", columns=3, height=600, object_fit="contain")
-def run_agent(_):
-    import os
-    from PIL import Image
-    os.makedirs("figures", exist_ok=True)  # Add this just before loading images
-    if df_global is None:
-        return "Please upload a file first.", []
-    # Save the dataset temporarily
-    from tempfile import NamedTemporaryFile
-    temp_file = NamedTemporaryFile(delete=False, suffix=".csv")
-    df_global.to_csv(temp_file.name, index=False)
-    temp_file.close()
-    # Prompt for the agent
-    prompt = """
-    You are an expert data analyst.
-    1. Load the provided dataset using: df = pd.read_csv(source_file)
-    2. Automatically detect numeric and categorical columns.
-    3. Perform:
-       - Basic statistics
-       - Null/duplicate checks
-       - Correlation analysis
-       - 3+ visualizations
-    4. Extract 3+ bullet-point insights.
-    5. Before saving visualizations, run:
-       import os; os.makedirs("figures", exist_ok=True)
-       Then save all figures using plt.savefig("./figures/...")
-    6. Return a JSON with:
-       - 'insights': list of insights
-       - 'figures': list of figure file paths
-    """
-    result = agent.run(prompt, additional_args={"source_file": temp_file.name})
-    # Parse and process output
-    insights = "No insights returned."
-    images = []
-    if isinstance(result, str):
-        try:
-            result = json.loads(result)
-        except Exception:
-            return "Agent returned invalid JSON.", []
-    if isinstance(result, dict):
-        raw_insights = result.get("insights", [])
-        insights = "\n".join(raw_insights) if isinstance(raw_insights, list) else str(raw_insights)
-        image_paths = result.get("figures", [])
-        print("🔍 Image paths received:", image_paths)
-        for path in image_paths:
-            if os.path.exists(path):
-                try:
-                    images.append(Image.open(path))
-                except Exception as e:
-                    print(f"⚠️ Error loading {path}: {e}")
-            else:
-                print(f"❌ File not found: {path}")
-    return insights, images

     df_global = df
     return df.head()
+def format_analysis_report(raw_output, visuals):
+    try:
+        if isinstance(raw_output, dict):
+            analysis_dict = raw_output
+        else:
+            try:
+                analysis_dict = ast.literal_eval(str(raw_output))
+            except (SyntaxError, ValueError) as e:
+                print(f"Error parsing CodeAgent output: {e}")
+                return str(raw_output), visuals  # Return raw output as string
+        report = f"""
+        <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
+            <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
+            <div style="margin-top: 25px; background: #f8f9fa; padding: 20px; border-radius: 8px;">
+                <h2 style="color: #2B547E;">🔍 Key Observations</h2>
+                {format_observations(analysis_dict.get('observations', {}))}
+            </div>
+            <div style="margin-top: 30px;">
+                <h2 style="color: #2B547E;">💡 Insights & Visualizations</h2>
+                {format_insights(analysis_dict.get('insights', {}), visuals)}
+            </div>
+        </div>
+        """
+        return report, visuals
+    except Exception as e:
+        print(f"Error in format_analysis_report: {e}")
+        return str(raw_output), visuals
+def format_observations(observations):
+    return '\n'.join([
+        f"""
+        <div style="margin: 15px 0; padding: 15px; background: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
+            <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
+            <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
+        </div>
+        """ for key, value in observations.items() if 'proportions' in key
+    ])
+def format_insights(insights, visuals):
+    return '\n'.join([
+        f"""
+        <div style="margin: 20px 0; padding: 20px; background: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
+            <div style="display: flex; align-items: center; gap: 10px;">
+                <div style="background: #2B547E; color: white; width: 30px; height: 30px; border-radius: 50%; display: flex; align-items: center; justify-content: center;">{idx+1}</div>
+                <p style="margin: 0; font-size: 16px;">{insight}</p>
+            </div>
+            {f'<img src="/file={visuals[idx]}" style="max-width: 100%; height: auto; margin-top: 10px; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">' if idx < len(visuals) else ''}
+        </div>
+        """ for idx, (key, insight) in enumerate(insights.items())
+    ])
+def analyze_data(csv_file, additional_notes=""):
+    start_time = time.time()
+    process = psutil.Process(os.getpid())
+    initial_memory = process.memory_info().rss / 1024 ** 2
+    if os.path.exists('./figures'):
+        shutil.rmtree('./figures')
+    os.makedirs('./figures', exist_ok=True)
+    wandb.login(key=os.environ.get('WANDB_API_KEY'))
+    run = wandb.init(project="huggingface-data-analysis", config={
+        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "additional_notes": additional_notes,
+        "source_file": csv_file.name if csv_file else None
+    })
+    agent = CodeAgent(tools=[], model=model, additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn"])
+    analysis_result = agent.run("""
+        You are an expert data analyst. Perform comprehensive analysis including:
+        1. Basic statistics and data quality checks
+        2. 3 insightful analytical questions about relationships in the data
+        3. Visualization of key patterns and correlations
+        4. Actionable real-world insights derived from findings.
+        Generate publication-quality visualizations and save to './figures/'.
+        Return the analysis results as a python dictionary that can be parsed by ast.literal_eval().
+        The dictionary should have the following structure:
+        {
+            'observations': {
+                'observation_1_key': 'observation_1_value',
+                'observation_2_key': 'observation_2_value',
+                ...
+            },
+            'insights': {
+                'insight_1_key': 'insight_1_value',
+                'insight_2_key': 'insight_2_value',
+                ...
+            }
+        }
+    """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
+    execution_time = time.time() - start_time
+    final_memory = process.memory_info().rss / 1024 ** 2
+    memory_usage = final_memory - initial_memory
+    wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
+    visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.endswith(('.png', '.jpg', '.jpeg'))]
+    for viz in visuals:
+        wandb.log({os.path.basename(viz): wandb.Image(viz)})
+    run.finish()
+    return format_analysis_report(analysis_result, visuals)