pavanmutha commited on
Commit
48a7c51
·
verified ·
1 Parent(s): 9fa587f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -39
app.py CHANGED
@@ -60,16 +60,18 @@ def set_target_column(col_name):
60
 
61
 
62
  def format_analysis_report(raw_output, visuals):
 
 
63
  try:
64
  if isinstance(raw_output, dict):
65
  analysis_dict = raw_output
66
  else:
67
  try:
68
- analysis_dict = ast.literal_eval(str(raw_output))
69
- except (SyntaxError, ValueError) as e:
70
  print(f"Error parsing CodeAgent output: {e}")
71
- return str(raw_output), visuals # Return raw output as string
72
-
73
  report = f"""
74
  <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
75
  <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
@@ -84,9 +86,11 @@ def format_analysis_report(raw_output, visuals):
84
  </div>
85
  """
86
  return report, visuals
 
87
  except Exception as e:
88
  print(f"Error in format_analysis_report: {e}")
89
- return str(raw_output), visuals
 
90
 
91
  def format_observations(observations):
92
  return '\n'.join([
@@ -95,7 +99,7 @@ def format_observations(observations):
95
  <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
96
  <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
97
  </div>
98
- """ for key, value in observations.items() if 'proportions' in key
99
  ])
100
 
101
  def format_insights(insights, visuals):
@@ -115,54 +119,75 @@ def analyze_data(csv_file, additional_notes=""):
115
  start_time = time.time()
116
  process = psutil.Process(os.getpid())
117
  initial_memory = process.memory_info().rss / 1024 ** 2
118
-
119
  if os.path.exists('./figures'):
120
  shutil.rmtree('./figures')
121
  os.makedirs('./figures', exist_ok=True)
122
-
123
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
124
  run = wandb.init(project="huggingface-data-analysis", config={
125
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
126
  "additional_notes": additional_notes,
127
  "source_file": csv_file.name if csv_file else None
128
  })
129
-
130
- agent = CodeAgent(tools=[], model=model, additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"])
131
- analysis_result = agent.run("""
132
- You are a data analysis agent. Just return insight information and visualization and follow following instruction strictly.
133
- 1. Load the data from provide source_file.Do not create your own.
134
- 2. Detect numeric and categorical columns.
135
- 3. Generate at least 5 visualizations and 5 insights.
136
- 4. Generate publication-quality visualizations and Save all plots to `./figures/` as PNGs using matplotlib or seaborn.
137
- 5. Do not use 'open()' or write to files. Just return variables and plots.
138
- 6. Return insights in this exact Python dictionary structure:
139
- The dictionary should have the following structure:
140
- {
141
- 'observations': {
142
- 'observation_1_key': 'Brief, clear observation.',
143
- 'observation_2_key': 'Another brief point.',
144
- ...
145
- },
146
- 'insights': {
147
- 'insight_1_key': 'insight_1_value',
148
- 'insight_2_key': 'insight_2_value',
149
- ...
150
- }
151
- }
152
- DO NOT use open(), DO NOT print. Only return this dictionary object as your final output in a code block.
153
 
154
- Be concise. Do not include text explanations outside the code block.
155
- """, additional_args={"additional_notes": additional_notes, "source_file": csv_file})
156
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  execution_time = time.time() - start_time
158
  final_memory = process.memory_info().rss / 1024 ** 2
159
  memory_usage = final_memory - initial_memory
160
- wandb.log({"execution_time_sec": execution_time, "memory_usage_mb": memory_usage})
161
-
162
- visuals = [os.path.join('./figures', f) for f in os.listdir('./figures') if f.endswith(('.png', '.jpg', '.jpeg'))]
 
 
 
 
 
 
 
 
163
  for viz in visuals:
164
  wandb.log({os.path.basename(viz): wandb.Image(viz)})
165
-
166
  run.finish()
167
  return format_analysis_report(analysis_result, visuals)
168
 
 
60
 
61
 
62
  def format_analysis_report(raw_output, visuals):
63
+ import json
64
+
65
  try:
66
  if isinstance(raw_output, dict):
67
  analysis_dict = raw_output
68
  else:
69
  try:
70
+ analysis_dict = json.loads(str(raw_output))
71
+ except (json.JSONDecodeError, TypeError) as e:
72
  print(f"Error parsing CodeAgent output: {e}")
73
+ return f"<pre>{str(raw_output)}</pre>", visuals
74
+
75
  report = f"""
76
  <div style="font-family: Arial, sans-serif; padding: 20px; color: #333;">
77
  <h1 style="color: #2B547E; border-bottom: 2px solid #2B547E; padding-bottom: 10px;">📊 Data Analysis Report</h1>
 
86
  </div>
87
  """
88
  return report, visuals
89
+
90
  except Exception as e:
91
  print(f"Error in format_analysis_report: {e}")
92
+ return f"<pre>{str(raw_output)}</pre>", visuals
93
+
94
 
95
  def format_observations(observations):
96
  return '\n'.join([
 
99
  <h3 style="margin: 0 0 10px 0; color: #4A708B;">{key.replace('_', ' ').title()}</h3>
100
  <pre style="margin: 0; padding: 10px; background: #f8f9fa; border-radius: 4px;">{value}</pre>
101
  </div>
102
+ """ for key, value in observations.items()
103
  ])
104
 
105
  def format_insights(insights, visuals):
 
119
  start_time = time.time()
120
  process = psutil.Process(os.getpid())
121
  initial_memory = process.memory_info().rss / 1024 ** 2
122
+
123
  if os.path.exists('./figures'):
124
  shutil.rmtree('./figures')
125
  os.makedirs('./figures', exist_ok=True)
126
+
127
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
128
  run = wandb.init(project="huggingface-data-analysis", config={
129
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
130
  "additional_notes": additional_notes,
131
  "source_file": csv_file.name if csv_file else None
132
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ agent = CodeAgent(
135
+ tools=[],
136
+ model=model,
137
+ additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
138
+ )
139
+
140
+ prompt = """
141
+ You are a helpful data analysis agent. Please follow these strict instructions:
142
+
143
+ 1. Load the data from the provided `source_file`.
144
+ 2. Detect numeric and categorical columns.
145
+ 3. Generate at least 5 visualizations and 5 insights.
146
+ 4. Save all plots to `./figures/` as PNGs using matplotlib or seaborn.
147
+ 5. DO NOT use open() or print() statements.
148
+ 6. Return your final result as a JSON-formatted Python string using `json.dumps()` like this:
149
+
150
+ ```py
151
+ import json
152
+
153
+ result = {
154
+ "observations": {
155
+ "numeric_summary": "3 numeric columns found: Revenue, Boxes, Sales",
156
+ "missing_data": "No missing values."
157
+ },
158
+ "insights": {
159
+ "top_country": "Australia had the highest total sales.",
160
+ "monthly_peak": "June had the highest sales volume."
161
+ }
162
+ }
163
+
164
+ json.dumps(result)
165
+ ```<end_code>
166
+ Be concise and avoid any narrative outside this final dictionary.
167
+ """
168
+
169
+ analysis_result = agent.run(prompt, additional_args={
170
+ "additional_notes": additional_notes,
171
+ "source_file": csv_file
172
+ })
173
+
174
  execution_time = time.time() - start_time
175
  final_memory = process.memory_info().rss / 1024 ** 2
176
  memory_usage = final_memory - initial_memory
177
+
178
+ wandb.log({
179
+ "execution_time_sec": execution_time,
180
+ "memory_usage_mb": memory_usage
181
+ })
182
+
183
+ visuals = sorted([
184
+ os.path.join('./figures', f) for f in os.listdir('./figures')
185
+ if f.endswith(('.png', '.jpg', '.jpeg'))
186
+ ])
187
+
188
  for viz in visuals:
189
  wandb.log({os.path.basename(viz): wandb.Image(viz)})
190
+
191
  run.finish()
192
  return format_analysis_report(analysis_result, visuals)
193