Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

VEDAGI1 commited on 5 days ago

Commit

a37596a

verified ·

1 Parent(s): acc53dd

Update app.py

Browse files

Files changed (1) hide show

app.py +240 -7

app.py CHANGED Viewed

@@ -158,6 +158,236 @@ def format_validated_json_for_report(validated_data: Dict[str, Any]) -> str:
         return json.dumps({"raw_data": str(validated_data)}, indent=2)
 # ---------------------- Analysis Script Generation ----------------------
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
@@ -293,14 +523,17 @@ def handle(user_msg: str, files: list, yield_update) -> str:
             yield_update("```\n🧠 Generating aligned analysis script...\n```")
             analysis_script = _create_python_script(prompt_for_code, schema_context)
-            yield_update("```\n⚙️ Executing script to extract raw data...\n```")
-            execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
-            output_buffer = io.StringIO()
             try:
-                with redirect_stdout(output_buffer):
-                    exec(analysis_script, execution_namespace)
-                raw_data_output = output_buffer.getvalue()
             except Exception as e:
                 return (
                     f"An error occurred executing the script: {e}\n\nGenerated Script:\n"

         return json.dumps({"raw_data": str(validated_data)}, indent=2)
+# ---------------------- Sandbox Execution ----------------------
+class SandboxViolationError(Exception):
+    """Raised when generated code attempts forbidden operations."""
+    pass
+# Restricted import function that only allows safe modules
+_ALLOWED_MODULES = frozenset({
+    "json", "math", "statistics", "collections", "itertools", "functools",
+    "operator", "string", "re", "datetime", "decimal", "fractions",
+    "random", "copy", "types", "typing", "dataclasses", "enum",
+    "numpy", "pandas", "scipy.stats",
+})
+_BLOCKED_MODULES = frozenset({
+    "os", "sys", "subprocess", "shutil", "pathlib", "glob",
+    "socket", "http", "urllib", "requests", "ftplib", "smtplib",
+    "pickle", "shelve", "marshal", "importlib", "builtins",
+    "ctypes", "multiprocessing", "threading", "asyncio",
+    "eval", "exec", "compile", "open", "file", "input",
+    "code", "codeop", "pty", "tty", "termios", "resource",
+    "signal", "mmap", "sysconfig", "platform",
+})
+def _safe_import(name: str, globals_dict=None, locals_dict=None, fromlist=(), level=0):
+    """Restricted import that only allows whitelisted modules."""
+    base_module = name.split('.')[0]
+    if base_module in _BLOCKED_MODULES or name in _BLOCKED_MODULES:
+        raise SandboxViolationError(f"Import of '{name}' is not allowed in sandbox environment.")
+    if base_module not in _ALLOWED_MODULES and name not in _ALLOWED_MODULES:
+        raise SandboxViolationError(f"Import of '{name}' is not allowed. Allowed modules: {', '.join(sorted(_ALLOWED_MODULES))}")
+    return __builtins__["__import__"](name, globals_dict, locals_dict, fromlist, level)
+def _create_sandbox_builtins() -> Dict[str, Any]:
+    """
+    Creates a restricted builtins dict that prevents dangerous operations.
+    Allows safe operations needed for data analysis.
+    """
+    import builtins
+    # Safe builtins for data analysis
+    safe_builtins = {
+        # Types and constructors
+        "bool": builtins.bool,
+        "int": builtins.int,
+        "float": builtins.float,
+        "str": builtins.str,
+        "list": builtins.list,
+        "dict": builtins.dict,
+        "tuple": builtins.tuple,
+        "set": builtins.set,
+        "frozenset": builtins.frozenset,
+        "bytes": builtins.bytes,
+        "bytearray": builtins.bytearray,
+        "complex": builtins.complex,
+        "slice": builtins.slice,
+        "type": builtins.type,
+        "object": builtins.object,
+        # Iteration and sequences
+        "range": builtins.range,
+        "enumerate": builtins.enumerate,
+        "zip": builtins.zip,
+        "map": builtins.map,
+        "filter": builtins.filter,
+        "reversed": builtins.reversed,
+        "sorted": builtins.sorted,
+        "iter": builtins.iter,
+        "next": builtins.next,
+        "len": builtins.len,
+        # Math and comparison
+        "abs": builtins.abs,
+        "min": builtins.min,
+        "max": builtins.max,
+        "sum": builtins.sum,
+        "pow": builtins.pow,
+        "round": builtins.round,
+        "divmod": builtins.divmod,
+        # Logic and identity
+        "all": builtins.all,
+        "any": builtins.any,
+        "isinstance": builtins.isinstance,
+        "issubclass": builtins.issubclass,
+        "id": builtins.id,
+        "hash": builtins.hash,
+        # String and representation
+        "repr": builtins.repr,
+        "ascii": builtins.ascii,
+        "chr": builtins.chr,
+        "ord": builtins.ord,
+        "format": builtins.format,
+        "print": builtins.print,
+        # Attribute access
+        "getattr": builtins.getattr,
+        "setattr": builtins.setattr,
+        "hasattr": builtins.hasattr,
+        "delattr": builtins.delattr,
+        # Other safe operations
+        "callable": builtins.callable,
+        "dir": builtins.dir,
+        "vars": builtins.vars,
+        "locals": builtins.locals,
+        "globals": lambda: {},  # Return empty dict to prevent access to real globals
+        # Exceptions (needed for error handling in scripts)
+        "Exception": builtins.Exception,
+        "ValueError": builtins.ValueError,
+        "TypeError": builtins.TypeError,
+        "KeyError": builtins.KeyError,
+        "IndexError": builtins.IndexError,
+        "AttributeError": builtins.AttributeError,
+        "ZeroDivisionError": builtins.ZeroDivisionError,
+        "StopIteration": builtins.StopIteration,
+        "RuntimeError": builtins.RuntimeError,
+        # Constants
+        "None": None,
+        "True": True,
+        "False": False,
+        "Ellipsis": builtins.Ellipsis,
+        "NotImplemented": builtins.NotImplemented,
+        # Restricted import
+        "__import__": _safe_import,
+        "__name__": "__sandbox__",
+        "__doc__": None,
+    }
+    return safe_builtins
+def _create_sandbox_namespace(dataframes: List[Any]) -> Dict[str, Any]:
+    """
+    Creates a sandboxed execution namespace with only safe operations.
+    This implements the ClarityOps security model:
+    - Memory-only execution (no file I/O)
+    - No network access
+    - No system calls
+    - Only data analysis libraries available
+    """
+    import numpy as np
+    sandbox_builtins = _create_sandbox_builtins()
+    namespace = {
+        "__builtins__": sandbox_builtins,
+        # Pre-loaded safe modules
+        "dfs": dataframes,
+        "pd": pd,
+        "np": np,
+        "re": re,
+        "json": json,
+        # Common pandas/numpy items for convenience
+        "DataFrame": pd.DataFrame,
+        "Series": pd.Series,
+        "NaN": np.nan,
+        "nan": np.nan,
+    }
+    return namespace
+def execute_in_sandbox(script: str, dataframes: List[Any]) -> str:
+    """
+    Executes the analysis script in a sandboxed environment.
+    Returns the captured stdout output.
+    Raises:
+        SandboxViolationError: If script attempts forbidden operations
+        Exception: For other execution errors
+    """
+    # Pre-execution safety checks on the script text
+    forbidden_patterns = [
+        (r'\bopen\s*\(', "File operations (open) are not allowed"),
+        (r'\bos\s*\.', "OS module access is not allowed"),
+        (r'\bsys\s*\.', "Sys module access is not allowed"),
+        (r'\bsubprocess', "Subprocess module is not allowed"),
+        (r'\bsocket\s*\.', "Network operations are not allowed"),
+        (r'\burllib', "Network operations are not allowed"),
+        (r'\brequests\s*\.', "Network operations are not allowed"),
+        (r'\bhttp\s*\.', "Network operations are not allowed"),
+        (r'\beval\s*\(', "eval() is not allowed"),
+        (r'\bexec\s*\(', "exec() is not allowed"),
+        (r'\bcompile\s*\(', "compile() is not allowed"),
+        (r'\b__import__\s*\(', "Direct __import__ calls are not allowed"),
+        (r'\bimportlib', "importlib is not allowed"),
+        (r'\bpickle', "pickle module is not allowed"),
+        (r'\bshutil', "shutil module is not allowed"),
+        (r'\bglobals\s*\(\s*\)', "globals() access is restricted"),
+        (r'\.to_csv\s*\(', "Writing files (to_csv) is not allowed"),
+        (r'\.to_excel\s*\(', "Writing files (to_excel) is not allowed"),
+        (r'\.to_parquet\s*\(', "Writing files (to_parquet) is not allowed"),
+        (r'\.to_sql\s*\(', "Database operations (to_sql) are not allowed"),
+        (r'pd\.read_', "Reading files is not allowed - use the provided dfs variable"),
+    ]
+    for pattern, message in forbidden_patterns:
+        if re.search(pattern, script):
+            raise SandboxViolationError(f"Security violation: {message}")
+    # Create sandboxed namespace
+    namespace = _create_sandbox_namespace(dataframes)
+    # Capture stdout
+    output_buffer = io.StringIO()
+    try:
+        with redirect_stdout(output_buffer):
+            exec(script, namespace, namespace)
+        return output_buffer.getvalue()
+    except SandboxViolationError:
+        raise
+    except Exception as e:
+        # Re-raise with context but don't expose internal details
+        raise RuntimeError(f"Script execution error: {type(e).__name__}: {e}")
 # ---------------------- Analysis Script Generation ----------------------
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
             yield_update("```\n🧠 Generating aligned analysis script...\n```")
             analysis_script = _create_python_script(prompt_for_code, schema_context)
+            yield_update("```\n⚙️ Executing script in sandbox...\n```")
             try:
+                raw_data_output = execute_in_sandbox(analysis_script, dataframes)
+            except SandboxViolationError as e:
+                safe_log("sandbox_violation", {"error": str(e)})
+                return (
+                    f"**Security Violation Detected**\n\n{e}\n\n"
+                    f"The generated script attempted a forbidden operation. "
+                    f"Please rephrase your request.\n\n"
+                    f"Generated Script:\n```python\n{analysis_script}\n```"
+                )
             except Exception as e:
                 return (
                     f"An error occurred executing the script: {e}\n\nGenerated Script:\n"