pavanmutha commited on
Commit
a175c5f
·
verified ·
1 Parent(s): fcdbea4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -132,16 +132,16 @@ def analyze_data(csv_file, additional_notes=""):
132
  process = psutil.Process(os.getpid())
133
  initial_memory = process.memory_info().rss / 1024 ** 2
134
 
135
- # Load and clean the data BEFORE passing to the agent
136
  try:
137
  df = pd.read_csv(csv_file)
138
  df = clean_data(df)
139
  except Exception as e:
140
  return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
141
 
142
- # Save cleaned data to a temporary file
143
- tmp_cleaned = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w')
144
- df.to_csv(tmp_cleaned.name, index=False)
145
 
146
  # Clear or create figures folder
147
  if os.path.exists('./figures'):
@@ -153,17 +153,19 @@ def analyze_data(csv_file, additional_notes=""):
153
  run = wandb.init(project="huggingface-data-analysis", config={
154
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
155
  "additional_notes": additional_notes,
156
- "source_file": tmp_cleaned.name
157
  })
158
 
159
- # Initialize agent
160
  agent = CodeAgent(
161
  tools=[],
162
  model=model,
163
- additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"]
 
 
164
  )
165
 
166
- # Run the agent on the cleaned file
167
  raw_output = agent.run("""
168
  You are a data analysis agent. Follow these instructions EXACTLY:
169
  1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
@@ -171,7 +173,7 @@ def analyze_data(csv_file, additional_notes=""):
171
  3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
172
  4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
173
  5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
174
- 6. Run agent efficiently and remove repetitive task and complete in less than 40 seconds.
175
  7. Output ONLY the following JSON code block format, exactly:
176
  {
177
  'observations': {
@@ -183,7 +185,10 @@ def analyze_data(csv_file, additional_notes=""):
183
  ...
184
  }
185
  }
186
- """, additional_args={"additional_notes": additional_notes, "source_file": tmp_cleaned})
 
 
 
187
 
188
  # Parse output
189
  parsed_result = extract_json_from_codeagent_output(raw_output) or {
 
132
  process = psutil.Process(os.getpid())
133
  initial_memory = process.memory_info().rss / 1024 ** 2
134
 
135
+ # Clean the uploaded CSV file
136
  try:
137
  df = pd.read_csv(csv_file)
138
  df = clean_data(df)
139
  except Exception as e:
140
  return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
141
 
142
+ # Save cleaned CSV to disk (using a stable location)
143
+ cleaned_csv_path = "./cleaned_data.csv"
144
+ df.to_csv(cleaned_csv_path, index=False)
145
 
146
  # Clear or create figures folder
147
  if os.path.exists('./figures'):
 
153
  run = wandb.init(project="huggingface-data-analysis", config={
154
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
155
  "additional_notes": additional_notes,
156
+ "source_file": cleaned_csv_path
157
  })
158
 
159
+ # CodeAgent instance
160
  agent = CodeAgent(
161
  tools=[],
162
  model=model,
163
+ additional_authorized_imports=[
164
+ "numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"
165
+ ]
166
  )
167
 
168
+ # Run agent on cleaned CSV
169
  raw_output = agent.run("""
170
  You are a data analysis agent. Follow these instructions EXACTLY:
171
  1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
 
173
  3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
174
  4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
175
  5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
176
+ 6. Run only 5 iteration and return output quickly.
177
  7. Output ONLY the following JSON code block format, exactly:
178
  {
179
  'observations': {
 
185
  ...
186
  }
187
  }
188
+ """, additional_args={"additional_notes": additional_notes, "source_file": cleaned_csv_path})
189
+
190
+
191
+
192
 
193
  # Parse output
194
  parsed_result = extract_json_from_codeagent_output(raw_output) or {