pavanmutha commited on
Commit
1f71b35
·
verified ·
1 Parent(s): 69a2d3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -68,10 +68,33 @@ def clean_data(df):
68
 
69
  # Add a extraction of JSON if CodeAgent Output is not in format
70
 
71
- import os, json, shutil, time, psutil, tempfile, re, ast
72
- import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  import wandb
74
 
 
75
  def extract_json_from_codeagent_output(raw_output):
76
  try:
77
  code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
@@ -87,6 +110,7 @@ def extract_json_from_codeagent_output(raw_output):
87
  return json.loads(match.group(1))
88
  except json.JSONDecodeError:
89
  return ast.literal_eval(match.group(1))
 
90
  fallback = re.search(r"\{[\s\S]+?\}", raw_output)
91
  if fallback:
92
  return json.loads(fallback.group(0))
@@ -95,9 +119,6 @@ def extract_json_from_codeagent_output(raw_output):
95
  return {"error": "Failed to extract structured JSON"}
96
 
97
  def analyze_data(csv_file, additional_notes=""):
98
- import time, os, shutil, psutil, json
99
- from pathlib import Path
100
-
101
  start_time = time.time()
102
  process = psutil.Process(os.getpid())
103
  initial_memory = process.memory_info().rss / 1024 ** 2
@@ -107,6 +128,7 @@ def analyze_data(csv_file, additional_notes=""):
107
  shutil.rmtree('./figures')
108
  os.makedirs('./figures', exist_ok=True)
109
 
 
110
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
111
  run = wandb.init(project="huggingface-data-analysis", config={
112
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -147,6 +169,7 @@ def analyze_data(csv_file, additional_notes=""):
147
  "error": "Failed to extract structured JSON"
148
  }
149
 
 
150
  execution_time = time.time() - start_time
151
  final_memory = process.memory_info().rss / 1024 ** 2
152
  memory_usage = final_memory - initial_memory
 
68
 
69
  # Add a extraction of JSON if CodeAgent Output is not in format
70
 
71
+ def extract_json_from_codeagent_output(raw_output):
72
+ try:
73
+ code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
74
+ for block in code_blocks:
75
+ for pattern in [
76
+ r"print\(\s*json\.dumps\(\s*(\{[\s\S]*?\})\s*\)\s*\)",
77
+ r"json\.dumps\(\s*(\{[\s\S]*?\})\s*\)",
78
+ r"result\s*=\s*(\{[\s\S]*?\})"
79
+ ]:
80
+ match = re.search(pattern, block, re.DOTALL)
81
+ if match:
82
+ try:
83
+ return json.loads(match.group(1))
84
+ except json.JSONDecodeError:
85
+ return ast.literal_eval(match.group(1))
86
+ fallback = re.search(r"\{[\s\S]+?\}", raw_output)
87
+ if fallback:
88
+ return json.loads(fallback.group(0))
89
+ except Exception as e:
90
+ print(f"[extract_json] Error: {e}")
91
+ return {"error": "Failed to extract structured JSON"}
92
+
93
+ import time, os, shutil, psutil, json
94
+ from pathlib import Path
95
  import wandb
96
 
97
+ # Add a robust JSON extraction function
98
  def extract_json_from_codeagent_output(raw_output):
99
  try:
100
  code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
 
110
  return json.loads(match.group(1))
111
  except json.JSONDecodeError:
112
  return ast.literal_eval(match.group(1))
113
+ # Fallback when no structured JSON pattern is found
114
  fallback = re.search(r"\{[\s\S]+?\}", raw_output)
115
  if fallback:
116
  return json.loads(fallback.group(0))
 
119
  return {"error": "Failed to extract structured JSON"}
120
 
121
  def analyze_data(csv_file, additional_notes=""):
 
 
 
122
  start_time = time.time()
123
  process = psutil.Process(os.getpid())
124
  initial_memory = process.memory_info().rss / 1024 ** 2
 
128
  shutil.rmtree('./figures')
129
  os.makedirs('./figures', exist_ok=True)
130
 
131
+ # Initialize WandB
132
  wandb.login(key=os.environ.get('WANDB_API_KEY'))
133
  run = wandb.init(project="huggingface-data-analysis", config={
134
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
 
169
  "error": "Failed to extract structured JSON"
170
  }
171
 
172
+ # Record execution time and memory usage
173
  execution_time = time.time() - start_time
174
  final_memory = process.memory_info().rss / 1024 ** 2
175
  memory_usage = final_memory - initial_memory