AIDataAgentProjectFinal

Paused

App Files Files Community

pavanmutha commited on Apr 18, 2025

Commit

1f71b35

verified ·

1 Parent(s): 69a2d3a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -5

app.py CHANGED Viewed

@@ -68,10 +68,33 @@ def clean_data(df):
 # Add a extraction of JSON if CodeAgent Output is not in format
-import os, json, shutil, time, psutil, tempfile, re, ast
-import pandas as pd
 import wandb
 def extract_json_from_codeagent_output(raw_output):
     try:
         code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
@@ -87,6 +110,7 @@ def extract_json_from_codeagent_output(raw_output):
                         return json.loads(match.group(1))
                     except json.JSONDecodeError:
                         return ast.literal_eval(match.group(1))
         fallback = re.search(r"\{[\s\S]+?\}", raw_output)
         if fallback:
             return json.loads(fallback.group(0))
@@ -95,9 +119,6 @@ def extract_json_from_codeagent_output(raw_output):
     return {"error": "Failed to extract structured JSON"}
 def analyze_data(csv_file, additional_notes=""):
-    import time, os, shutil, psutil, json
-    from pathlib import Path
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
@@ -107,6 +128,7 @@ def analyze_data(csv_file, additional_notes=""):
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -147,6 +169,7 @@ def analyze_data(csv_file, additional_notes=""):
         "error": "Failed to extract structured JSON"
     }
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory

 # Add a extraction of JSON if CodeAgent Output is not in format
+def extract_json_from_codeagent_output(raw_output):
+    try:
+        code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
+        for block in code_blocks:
+            for pattern in [
+                r"print\(\s*json\.dumps\(\s*(\{[\s\S]*?\})\s*\)\s*\)",
+                r"json\.dumps\(\s*(\{[\s\S]*?\})\s*\)",
+                r"result\s*=\s*(\{[\s\S]*?\})"
+            ]:
+                match = re.search(pattern, block, re.DOTALL)
+                if match:
+                    try:
+                        return json.loads(match.group(1))
+                    except json.JSONDecodeError:
+                        return ast.literal_eval(match.group(1))
+        fallback = re.search(r"\{[\s\S]+?\}", raw_output)
+        if fallback:
+            return json.loads(fallback.group(0))
+    except Exception as e:
+        print(f"[extract_json] Error: {e}")
+    return {"error": "Failed to extract structured JSON"}
+import time, os, shutil, psutil, json
+from pathlib import Path
 import wandb
+# Add a robust JSON extraction function
 def extract_json_from_codeagent_output(raw_output):
     try:
         code_blocks = re.findall(r"```(?:py|python)?\n(.*?)```", raw_output, re.DOTALL)
                         return json.loads(match.group(1))
                     except json.JSONDecodeError:
                         return ast.literal_eval(match.group(1))
+        # Fallback when no structured JSON pattern is found
         fallback = re.search(r"\{[\s\S]+?\}", raw_output)
         if fallback:
             return json.loads(fallback.group(0))
     return {"error": "Failed to extract structured JSON"}
 def analyze_data(csv_file, additional_notes=""):
     start_time = time.time()
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / 1024 ** 2
         shutil.rmtree('./figures')
     os.makedirs('./figures', exist_ok=True)
+    # Initialize WandB
     wandb.login(key=os.environ.get('WANDB_API_KEY'))
     run = wandb.init(project="huggingface-data-analysis", config={
         "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
         "error": "Failed to extract structured JSON"
     }
+    # Record execution time and memory usage
     execution_time = time.time() - start_time
     final_memory = process.memory_info().rss / 1024 ** 2
     memory_usage = final_memory - initial_memory