AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 17, 2025

Commit

48a7c51

verified ·

1 Parent(s): 9fa587f

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -39

app.py CHANGED Viewed

@@ -60,16 +60,18 @@ def set_target_column(col_name):
 def format_analysis_report(raw_output, visuals):
     try:
         if isinstance(raw_output, dict):
             analysis_dict = raw_output
         else:
             try:
-                analysis_dict = ast.literal_eval(str(raw_output))
-            except (SyntaxError, ValueError) as e:
                 print(f"Error parsing CodeAgent output: {e}")
-                return str(raw_output), visuals  # Return raw output as string
         report = f"""
         <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
             <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
@@ -84,9 +86,11 @@ def format_analysis_report(raw_output, visuals):
         </div>
         """
         return report, visuals
     except Exception as e:
         print(f"Error in format_analysis_report: {e}")
-        return str(raw_output), visuals
 def format_observations(observations):
     return '\n'.join([
@@ -95,7 +99,7 @@ def format_observations(observations):
             <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
             <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
         </div>
-        """ for key, value in observations.items() if 'proportions' in key
     ])
 def format_insights(insights, visuals):
@@ -115,54 +119,75 @@ def analyze_data(csv_file, additional_notes=""):
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
     if os.path.exists('./figures'):
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
         "source_file": csv_file.name if csv_file else None
     })
-    agent = CodeAgent(tools=[], model=model, additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"])
-    analysis_result = agent.run("""
-        You are a data analysis agent. Just return insight information and visualization and follow following instruction strictly.
-        1. Load the data from provide source_file.Do not create your own.
-        2. Detect numeric and categorical columns.
-        3. Generate at least 5 visualizations and 5 insights.
-        4. Generate publication-quality visualizations and Save all plots to `./figures/` as PNGs using matplotlib or seaborn.
-        5. Do not use 'open()' or write to files. Just return variables and plots.
-        6. Return insights in this exact Python dictionary structure:
-        The dictionary should have the following structure:
-        {
-            'observations': {
-                'observation_1_key': 'Brief, clear observation.',
-                'observation_2_key': 'Another brief point.',
-                ...
-            },
-            'insights': {
-                'insight_1_key': 'insight_1_value',
-                'insight_2_key': 'insight_2_value',
-                ...
-            }
-        }
-    DO NOT use open(), DO NOT print. Only return this dictionary object as your final output in a code block.
-    Be concise. Do not include text explanations outside the code block.
-    """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory
-    wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
-    visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.endswith(('.png', '.jpg', '.jpeg'))]
     for viz in visuals:
         wandb.log({os.path.basename(viz): wandb.Image(viz)})
     run.finish()
     return format_analysis_report(analysis_result, visuals)

 def format_analysis_report(raw_output, visuals):
+    import json
     try:
         if isinstance(raw_output, dict):
             analysis_dict = raw_output
         else:
             try:
+                analysis_dict = json.loads(str(raw_output))
+            except (json.JSONDecodeError, TypeError) as e:
                 print(f"Error parsing CodeAgent output: {e}")
+                return f"<pre>{str(raw_output)}</pre>", visuals
         report = f"""
         <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
             <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
         </div>
         """
         return report, visuals
     except Exception as e:
         print(f"Error in format_analysis_report: {e}")
+        return f"<pre>{str(raw_output)}</pre>", visuals
 def format_observations(observations):
     return '\n'.join([
             <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
             <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
         </div>
+        """ for key, value in observations.items()
     ])
 def format_insights(insights, visuals):
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
     if os.path.exists('./figures'):
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "additional_notes": additional_notes,
         "source_file": csv_file.name if csv_file else None
     })
+    agent = CodeAgent(
+        tools=[],
+        model=model,
+        additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
+    )
+    prompt = """
+You are a helpful data analysis agent. Please follow these strict instructions:
+1. Load the data from the provided `source_file`.
+2. Detect numeric and categorical columns.
+3. Generate at least 5 visualizations and 5 insights.
+4. Save all plots to `./figures/` as PNGs using matplotlib or seaborn.
+5. DO NOT use open() or print() statements.
+6. Return your final result as a JSON-formatted Python string using `json.dumps()` like this:
+```py
+import json
+result = {
+    "observations": {
+        "numeric_summary": "3 numeric columns found: Revenue, Boxes, Sales",
+        "missing_data": "No missing values."
+    },
+    "insights": {
+        "top_country": "Australia had the highest total sales.",
+        "monthly_peak": "June had the highest sales volume."
+    }
+}
+json.dumps(result)
+```<end_code>
+Be concise and avoid any narrative outside this final dictionary.
+"""
+    analysis_result = agent.run(prompt, additional_args={
+        "additional_notes": additional_notes,
+        "source_file": csv_file
+    })
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory
+    wandb.log({
+        "execution_time_sec": execution_time,
+        "memory_usage_mb": memory_usage
+    })
+    visuals = sorted([
+        os.path.join('./figures', f) for f in os.listdir('./figures')
+        if f.endswith(('.png', '.jpg', '.jpeg'))
+    ])
     for viz in visuals:
         wandb.log({os.path.basename(viz): wandb.Image(viz)})
     run.finish()
     return format_analysis_report(analysis_result, visuals)