pavanmutha commited on
Commit
0a2c5ae
·
verified ·
1 Parent(s): e989ad4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -46
app.py CHANGED
@@ -95,84 +95,89 @@ def extract_json_from_codeagent_output(raw_output):
95
  return {"error": "Failed to extract structured JSON"}
96
 
97
  def analyze_data(csv_file, additional_notes=""):
 
 
 
98
  start_time = time.time()
99
  process = psutil.Process(os.getpid())
100
  initial_memory = process.memory_info().rss / 1024 ** 2
101
 
102
- # Load and trim dataset
103
- df = pd.read_csv(csv_file)
104
- df_trimmed = df.iloc[:300, :10] # Limit rows and columns for performance
105
- temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
106
- df_trimmed.to_csv(temp_path, index=False)
107
-
108
- # Clear figures
109
  if os.path.exists('./figures'):
110
  shutil.rmtree('./figures')
111
  os.makedirs('./figures', exist_ok=True)
112
 
113
- # Start W&B
114
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
115
  run = wandb.init(project="huggingface-data-analysis", config={
116
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
117
  "additional_notes": additional_notes,
118
- "source_file": csv_file.name
119
  })
120
 
121
- # Create CodeAgent instance
122
  agent = CodeAgent(
123
  tools=[],
124
- model=model,
125
  additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
126
  )
127
 
128
- prompt = f"""
 
129
  You are a helpful data analysis agent. Follow these instructions EXACTLY:
130
- 1. Load the data from `source_file` ONLY.
131
- 2. Generate up to 3 observations and 3 visualizations.
132
- 3. Save all figures to ./figures as PNGs using matplotlib/seaborn.
133
- 4. Use only: pandas, numpy, matplotlib.pyplot, seaborn, json.
134
- 5. ⚠️ Output ONLY the following JSON format inside a single code block:
135
- {{
136
- "observations": {{
137
- "key": "value"
138
- }},
139
- "insights": {{
140
- "key": "value"
141
- }}
142
- }}
143
- 6. Do not include comments or narration.
144
- 7. Complete the analysis quickly (limit iterations).
145
- """
 
 
146
 
147
- try:
148
- raw_output = agent.run(prompt, additional_args={
149
- "source_file": open(temp_path, "rb"),
150
- "additional_notes": additional_notes
151
- })
152
- parsed_result = extract_json_from_codeagent_output(raw_output)
153
- except Exception as e:
154
- print(f"[analyze_data] Agent failed: {e}")
155
- parsed_result = {"error": str(e)}
156
 
157
- # Log performance
158
  execution_time = time.time() - start_time
159
  final_memory = process.memory_info().rss / 1024 ** 2
160
  memory_usage = final_memory - initial_memory
161
- wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
162
 
163
- # Upload visuals
 
 
 
 
 
164
  visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
165
  for viz in visuals:
166
  wandb.log({os.path.basename(viz): wandb.Image(viz)})
167
 
168
  run.finish()
169
 
170
- return {
171
- "summary": parsed_result,
172
- "visuals": visuals,
173
- "execution_time_sec": round(execution_time, 2),
174
- "memory_usage_mb": round(memory_usage, 2)
175
- }
 
 
 
 
 
 
 
 
 
176
 
177
 
178
 
 
95
  return {"error": "Failed to extract structured JSON"}
96
 
97
  def analyze_data(csv_file, additional_notes=""):
98
+ import time, os, shutil, psutil, json
99
+ from pathlib import Path
100
+
101
  start_time = time.time()
102
  process = psutil.Process(os.getpid())
103
  initial_memory = process.memory_info().rss / 1024 ** 2
104
 
105
+ # Clear or create figures folder
 
 
 
 
 
 
106
  if os.path.exists('./figures'):
107
  shutil.rmtree('./figures')
108
  os.makedirs('./figures', exist_ok=True)
109
 
 
110
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
111
  run = wandb.init(project="huggingface-data-analysis", config={
112
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
113
  "additional_notes": additional_notes,
114
+ "source_file": csv_file.name if csv_file else None
115
  })
116
 
 
117
  agent = CodeAgent(
118
  tools=[],
119
+ model=model,
120
  additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
121
  )
122
 
123
+ # Run the CodeAgent
124
+ raw_output = agent.run("""
125
  You are a helpful data analysis agent. Follow these instructions EXACTLY:
126
+ 1. Load the data from the given `source_file` ONLY.
127
+ 2. Analyze the data structure and generate up to 5 visualizations and 5 insights.
128
+ 3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
129
+ 4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
130
+ 5. DO NOT return any explanations, thoughts, or narration outside the final output block.
131
+ 6. Run only 5 iteration and return output quickly.
132
+ ⚠️ Output ONLY the following code block format, exactly:
133
+ {
134
+ 'observations': {
135
+ 'observation_1_key': 'observation_1_value',
136
+ ...
137
+ },
138
+ 'insights': {
139
+ 'insight_1_key': 'insight_1_value',
140
+ ...
141
+ }
142
+ }
143
+ """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
144
 
145
+ # Parse agent output
146
+ parsed_result = extract_json_from_codeagent_output(raw_output) or {
147
+ "error": "Failed to extract structured JSON"
148
+ }
 
 
 
 
 
149
 
 
150
  execution_time = time.time() - start_time
151
  final_memory = process.memory_info().rss / 1024 ** 2
152
  memory_usage = final_memory - initial_memory
 
153
 
154
+ wandb.log({
155
+ "execution_time_sec": round(execution_time, 2),
156
+ "memory_usage_mb": round(memory_usage, 2)
157
+ })
158
+
159
+ # Collect generated visualizations
160
  visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
161
  for viz in visuals:
162
  wandb.log({os.path.basename(viz): wandb.Image(viz)})
163
 
164
  run.finish()
165
 
166
+ # Generate summary HTML
167
+ summary_html = "<h3>📊 Data Analysis Summary</h3>"
168
+ if "observations" in parsed_result:
169
+ summary_html += "<h4>🔍 Observations</h4><ul>" + "".join(
170
+ f"<li><b>{k}:</b> {v}</li>" for k, v in parsed_result["observations"].items()
171
+ ) + "</ul>"
172
+ if "insights" in parsed_result:
173
+ summary_html += "<h4>💡 Insights</h4><ul>" + "".join(
174
+ f"<li><b>{k}:</b> {v}</li>" for k, v in parsed_result["insights"].items()
175
+ ) + "</ul>"
176
+ if "error" in parsed_result:
177
+ summary_html += f"<p style='color:red'><b>Error:</b> {parsed_result['error']}</p>"
178
+
179
+ # Return summary HTML and visual paths for gr.HTML + gr.Gallery
180
+ return summary_html, visuals
181
 
182
 
183