File size: 2,674 Bytes
96638b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from typing import Dict, Any, Optional, Union
import pandas as pd
import numpy as np
from src.data.loader import DataLoader
import traceback

class QueryExecutor:
    """
    Executes the generated pandas code on loaded datasets.
    Handles on-the-fly joining and sandboxed execution.
    
    Attributes:
        df1: Primary health metrics dataset
        df2: Physical activity dataset
    """
    
    def __init__(self) -> None:
        self.df1: pd.DataFrame
        self.df2: pd.DataFrame
        self.df1, self.df2 = DataLoader.load_datasets()
        
    def execute(self, query_code: str) -> Dict[str, Any]:
        """
        Executes the provided python code in a sandboxed environment.
        
        Args:
            query_code: Python/Pandas code string to execute
            
        Returns:
            Dictionary containing:
                - success: Whether execution succeeded
                - result: Query result (DataFrame, Series, or scalar)
                - error: Error message if failed, None otherwise
                - traceback: Full traceback if execution failed
        """
        if not query_code:
            return {"success": False, "result": None, "error": "No query code provided"}
            
        # Define execution namespace
        local_scope = {
            "df1": self.df1,
            "df2": self.df2,
            "pd": pd,
            "np": np,
            "result": None
        }
        
        try:
            # Execute in restricted scope
            exec(query_code, {}, local_scope)
            
            result = local_scope.get("result")
            
            # Post-processing for serialization/display
            processed_result = self._process_result(result)
            
            return {
                "success": True,
                "result": processed_result,
                "error": None
            }
            
        except Exception as e:
            error_msg = f"{type(e).__name__}: {str(e)}"
            return {
                "success": False, 
                "result": None, 
                "error": error_msg,
                "traceback": traceback.format_exc()
            }
    
    def _process_result(self, result: Any) -> Union[pd.DataFrame, pd.Series, Any]:
        """
        Helper to format result for downstream consumption.
        
        Args:
            result: Raw execution result
            
        Returns:
            Processed result suitable for display and reasoning
        """
        if isinstance(result, (pd.DataFrame, pd.Series)):
            return result  # Keep as pandas object for display/reasoning
        return result