Update app.py
Browse files
app.py
CHANGED
|
@@ -132,16 +132,16 @@ def analyze_data(csv_file, additional_notes=""):
|
|
| 132 |
process = psutil.Process(os.getpid())
|
| 133 |
initial_memory = process.memory_info().rss / 1024 ** 2
|
| 134 |
|
| 135 |
-
#
|
| 136 |
try:
|
| 137 |
df = pd.read_csv(csv_file)
|
| 138 |
df = clean_data(df)
|
| 139 |
except Exception as e:
|
| 140 |
return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
|
| 141 |
|
| 142 |
-
# Save cleaned
|
| 143 |
-
|
| 144 |
-
df.to_csv(
|
| 145 |
|
| 146 |
# Clear or create figures folder
|
| 147 |
if os.path.exists('./figures'):
|
|
@@ -153,17 +153,19 @@ def analyze_data(csv_file, additional_notes=""):
|
|
| 153 |
run = wandb.init(project="huggingface-data-analysis", config={
|
| 154 |
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 155 |
"additional_notes": additional_notes,
|
| 156 |
-
"source_file":
|
| 157 |
})
|
| 158 |
|
| 159 |
-
#
|
| 160 |
agent = CodeAgent(
|
| 161 |
tools=[],
|
| 162 |
model=model,
|
| 163 |
-
additional_authorized_imports=[
|
|
|
|
|
|
|
| 164 |
)
|
| 165 |
|
| 166 |
-
# Run
|
| 167 |
raw_output = agent.run("""
|
| 168 |
You are a data analysis agent. Follow these instructions EXACTLY:
|
| 169 |
1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
|
|
@@ -171,7 +173,7 @@ def analyze_data(csv_file, additional_notes=""):
|
|
| 171 |
3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
|
| 172 |
4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
|
| 173 |
5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
|
| 174 |
-
6. Run
|
| 175 |
7. Output ONLY the following JSON code block format, exactly:
|
| 176 |
{
|
| 177 |
'observations': {
|
|
@@ -183,7 +185,10 @@ def analyze_data(csv_file, additional_notes=""):
|
|
| 183 |
...
|
| 184 |
}
|
| 185 |
}
|
| 186 |
-
""", additional_args={"additional_notes": additional_notes, "source_file":
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
# Parse output
|
| 189 |
parsed_result = extract_json_from_codeagent_output(raw_output) or {
|
|
|
|
| 132 |
process = psutil.Process(os.getpid())
|
| 133 |
initial_memory = process.memory_info().rss / 1024 ** 2
|
| 134 |
|
| 135 |
+
# Clean the uploaded CSV file
|
| 136 |
try:
|
| 137 |
df = pd.read_csv(csv_file)
|
| 138 |
df = clean_data(df)
|
| 139 |
except Exception as e:
|
| 140 |
return f"<p style='color:red'><b>Error loading or cleaning CSV:</b> {e}</p>", []
|
| 141 |
|
| 142 |
+
# Save cleaned CSV to disk (using a stable location)
|
| 143 |
+
cleaned_csv_path = "./cleaned_data.csv"
|
| 144 |
+
df.to_csv(cleaned_csv_path, index=False)
|
| 145 |
|
| 146 |
# Clear or create figures folder
|
| 147 |
if os.path.exists('./figures'):
|
|
|
|
| 153 |
run = wandb.init(project="huggingface-data-analysis", config={
|
| 154 |
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 155 |
"additional_notes": additional_notes,
|
| 156 |
+
"source_file": cleaned_csv_path
|
| 157 |
})
|
| 158 |
|
| 159 |
+
# CodeAgent instance
|
| 160 |
agent = CodeAgent(
|
| 161 |
tools=[],
|
| 162 |
model=model,
|
| 163 |
+
additional_authorized_imports=[
|
| 164 |
+
"numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn", "json"
|
| 165 |
+
]
|
| 166 |
)
|
| 167 |
|
| 168 |
+
# Run agent on cleaned CSV
|
| 169 |
raw_output = agent.run("""
|
| 170 |
You are a data analysis agent. Follow these instructions EXACTLY:
|
| 171 |
1. Load the data from the given `source_file` ONLY. DO NOT create your OWN DATA.
|
|
|
|
| 173 |
3. Save all figures to `./figures` as PNG using matplotlib or seaborn.
|
| 174 |
4. Use only authorized imports: `pandas`, `numpy`, `matplotlib.pyplot`, `seaborn`, `json`.
|
| 175 |
5. DO NOT return any explanations, thoughts, or narration outside the final JSON block
|
| 176 |
+
6. Run only 5 iteration and return output quickly.
|
| 177 |
7. Output ONLY the following JSON code block format, exactly:
|
| 178 |
{
|
| 179 |
'observations': {
|
|
|
|
| 185 |
...
|
| 186 |
}
|
| 187 |
}
|
| 188 |
+
""", additional_args={"additional_notes": additional_notes, "source_file": cleaned_csv_path})
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
|
| 192 |
|
| 193 |
# Parse output
|
| 194 |
parsed_result = extract_json_from_codeagent_output(raw_output) or {
|