VEDAGI1 commited on
Commit
a37596a
·
verified ·
1 Parent(s): acc53dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -7
app.py CHANGED
@@ -158,6 +158,236 @@ def format_validated_json_for_report(validated_data: Dict[str, Any]) -> str:
158
  return json.dumps({"raw_data": str(validated_data)}, indent=2)
159
 
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  # ---------------------- Analysis Script Generation ----------------------
162
 
163
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
@@ -293,14 +523,17 @@ def handle(user_msg: str, files: list, yield_update) -> str:
293
  yield_update("```\n🧠 Generating aligned analysis script...\n```")
294
  analysis_script = _create_python_script(prompt_for_code, schema_context)
295
 
296
- yield_update("```\n⚙️ Executing script to extract raw data...\n```")
297
- execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
298
- output_buffer = io.StringIO()
299
-
300
  try:
301
- with redirect_stdout(output_buffer):
302
- exec(analysis_script, execution_namespace)
303
- raw_data_output = output_buffer.getvalue()
 
 
 
 
 
 
304
  except Exception as e:
305
  return (
306
  f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
 
158
  return json.dumps({"raw_data": str(validated_data)}, indent=2)
159
 
160
 
161
+ # ---------------------- Sandbox Execution ----------------------
162
+
163
+ class SandboxViolationError(Exception):
164
+ """Raised when generated code attempts forbidden operations."""
165
+ pass
166
+
167
+
168
+ # Restricted import function that only allows safe modules
169
+ _ALLOWED_MODULES = frozenset({
170
+ "json", "math", "statistics", "collections", "itertools", "functools",
171
+ "operator", "string", "re", "datetime", "decimal", "fractions",
172
+ "random", "copy", "types", "typing", "dataclasses", "enum",
173
+ "numpy", "pandas", "scipy.stats",
174
+ })
175
+
176
+ _BLOCKED_MODULES = frozenset({
177
+ "os", "sys", "subprocess", "shutil", "pathlib", "glob",
178
+ "socket", "http", "urllib", "requests", "ftplib", "smtplib",
179
+ "pickle", "shelve", "marshal", "importlib", "builtins",
180
+ "ctypes", "multiprocessing", "threading", "asyncio",
181
+ "eval", "exec", "compile", "open", "file", "input",
182
+ "code", "codeop", "pty", "tty", "termios", "resource",
183
+ "signal", "mmap", "sysconfig", "platform",
184
+ })
185
+
186
+
187
+ def _safe_import(name: str, globals_dict=None, locals_dict=None, fromlist=(), level=0):
188
+ """Restricted import that only allows whitelisted modules."""
189
+ base_module = name.split('.')[0]
190
+
191
+ if base_module in _BLOCKED_MODULES or name in _BLOCKED_MODULES:
192
+ raise SandboxViolationError(f"Import of '{name}' is not allowed in sandbox environment.")
193
+
194
+ if base_module not in _ALLOWED_MODULES and name not in _ALLOWED_MODULES:
195
+ raise SandboxViolationError(f"Import of '{name}' is not allowed. Allowed modules: {', '.join(sorted(_ALLOWED_MODULES))}")
196
+
197
+ return __builtins__["__import__"](name, globals_dict, locals_dict, fromlist, level)
198
+
199
+
200
+ def _create_sandbox_builtins() -> Dict[str, Any]:
201
+ """
202
+ Creates a restricted builtins dict that prevents dangerous operations.
203
+ Allows safe operations needed for data analysis.
204
+ """
205
+ import builtins
206
+
207
+ # Safe builtins for data analysis
208
+ safe_builtins = {
209
+ # Types and constructors
210
+ "bool": builtins.bool,
211
+ "int": builtins.int,
212
+ "float": builtins.float,
213
+ "str": builtins.str,
214
+ "list": builtins.list,
215
+ "dict": builtins.dict,
216
+ "tuple": builtins.tuple,
217
+ "set": builtins.set,
218
+ "frozenset": builtins.frozenset,
219
+ "bytes": builtins.bytes,
220
+ "bytearray": builtins.bytearray,
221
+ "complex": builtins.complex,
222
+ "slice": builtins.slice,
223
+ "type": builtins.type,
224
+ "object": builtins.object,
225
+
226
+ # Iteration and sequences
227
+ "range": builtins.range,
228
+ "enumerate": builtins.enumerate,
229
+ "zip": builtins.zip,
230
+ "map": builtins.map,
231
+ "filter": builtins.filter,
232
+ "reversed": builtins.reversed,
233
+ "sorted": builtins.sorted,
234
+ "iter": builtins.iter,
235
+ "next": builtins.next,
236
+ "len": builtins.len,
237
+
238
+ # Math and comparison
239
+ "abs": builtins.abs,
240
+ "min": builtins.min,
241
+ "max": builtins.max,
242
+ "sum": builtins.sum,
243
+ "pow": builtins.pow,
244
+ "round": builtins.round,
245
+ "divmod": builtins.divmod,
246
+
247
+ # Logic and identity
248
+ "all": builtins.all,
249
+ "any": builtins.any,
250
+ "isinstance": builtins.isinstance,
251
+ "issubclass": builtins.issubclass,
252
+ "id": builtins.id,
253
+ "hash": builtins.hash,
254
+
255
+ # String and representation
256
+ "repr": builtins.repr,
257
+ "ascii": builtins.ascii,
258
+ "chr": builtins.chr,
259
+ "ord": builtins.ord,
260
+ "format": builtins.format,
261
+ "print": builtins.print,
262
+
263
+ # Attribute access
264
+ "getattr": builtins.getattr,
265
+ "setattr": builtins.setattr,
266
+ "hasattr": builtins.hasattr,
267
+ "delattr": builtins.delattr,
268
+
269
+ # Other safe operations
270
+ "callable": builtins.callable,
271
+ "dir": builtins.dir,
272
+ "vars": builtins.vars,
273
+ "locals": builtins.locals,
274
+ "globals": lambda: {}, # Return empty dict to prevent access to real globals
275
+
276
+ # Exceptions (needed for error handling in scripts)
277
+ "Exception": builtins.Exception,
278
+ "ValueError": builtins.ValueError,
279
+ "TypeError": builtins.TypeError,
280
+ "KeyError": builtins.KeyError,
281
+ "IndexError": builtins.IndexError,
282
+ "AttributeError": builtins.AttributeError,
283
+ "ZeroDivisionError": builtins.ZeroDivisionError,
284
+ "StopIteration": builtins.StopIteration,
285
+ "RuntimeError": builtins.RuntimeError,
286
+
287
+ # Constants
288
+ "None": None,
289
+ "True": True,
290
+ "False": False,
291
+ "Ellipsis": builtins.Ellipsis,
292
+ "NotImplemented": builtins.NotImplemented,
293
+
294
+ # Restricted import
295
+ "__import__": _safe_import,
296
+ "__name__": "__sandbox__",
297
+ "__doc__": None,
298
+ }
299
+
300
+ return safe_builtins
301
+
302
+
303
+ def _create_sandbox_namespace(dataframes: List[Any]) -> Dict[str, Any]:
304
+ """
305
+ Creates a sandboxed execution namespace with only safe operations.
306
+
307
+ This implements the ClarityOps security model:
308
+ - Memory-only execution (no file I/O)
309
+ - No network access
310
+ - No system calls
311
+ - Only data analysis libraries available
312
+ """
313
+ import numpy as np
314
+
315
+ sandbox_builtins = _create_sandbox_builtins()
316
+
317
+ namespace = {
318
+ "__builtins__": sandbox_builtins,
319
+ # Pre-loaded safe modules
320
+ "dfs": dataframes,
321
+ "pd": pd,
322
+ "np": np,
323
+ "re": re,
324
+ "json": json,
325
+ # Common pandas/numpy items for convenience
326
+ "DataFrame": pd.DataFrame,
327
+ "Series": pd.Series,
328
+ "NaN": np.nan,
329
+ "nan": np.nan,
330
+ }
331
+
332
+ return namespace
333
+
334
+
335
+ def execute_in_sandbox(script: str, dataframes: List[Any]) -> str:
336
+ """
337
+ Executes the analysis script in a sandboxed environment.
338
+
339
+ Returns the captured stdout output.
340
+
341
+ Raises:
342
+ SandboxViolationError: If script attempts forbidden operations
343
+ Exception: For other execution errors
344
+ """
345
+ # Pre-execution safety checks on the script text
346
+ forbidden_patterns = [
347
+ (r'\bopen\s*\(', "File operations (open) are not allowed"),
348
+ (r'\bos\s*\.', "OS module access is not allowed"),
349
+ (r'\bsys\s*\.', "Sys module access is not allowed"),
350
+ (r'\bsubprocess', "Subprocess module is not allowed"),
351
+ (r'\bsocket\s*\.', "Network operations are not allowed"),
352
+ (r'\burllib', "Network operations are not allowed"),
353
+ (r'\brequests\s*\.', "Network operations are not allowed"),
354
+ (r'\bhttp\s*\.', "Network operations are not allowed"),
355
+ (r'\beval\s*\(', "eval() is not allowed"),
356
+ (r'\bexec\s*\(', "exec() is not allowed"),
357
+ (r'\bcompile\s*\(', "compile() is not allowed"),
358
+ (r'\b__import__\s*\(', "Direct __import__ calls are not allowed"),
359
+ (r'\bimportlib', "importlib is not allowed"),
360
+ (r'\bpickle', "pickle module is not allowed"),
361
+ (r'\bshutil', "shutil module is not allowed"),
362
+ (r'\bglobals\s*\(\s*\)', "globals() access is restricted"),
363
+ (r'\.to_csv\s*\(', "Writing files (to_csv) is not allowed"),
364
+ (r'\.to_excel\s*\(', "Writing files (to_excel) is not allowed"),
365
+ (r'\.to_parquet\s*\(', "Writing files (to_parquet) is not allowed"),
366
+ (r'\.to_sql\s*\(', "Database operations (to_sql) are not allowed"),
367
+ (r'pd\.read_', "Reading files is not allowed - use the provided dfs variable"),
368
+ ]
369
+
370
+ for pattern, message in forbidden_patterns:
371
+ if re.search(pattern, script):
372
+ raise SandboxViolationError(f"Security violation: {message}")
373
+
374
+ # Create sandboxed namespace
375
+ namespace = _create_sandbox_namespace(dataframes)
376
+
377
+ # Capture stdout
378
+ output_buffer = io.StringIO()
379
+
380
+ try:
381
+ with redirect_stdout(output_buffer):
382
+ exec(script, namespace, namespace)
383
+ return output_buffer.getvalue()
384
+ except SandboxViolationError:
385
+ raise
386
+ except Exception as e:
387
+ # Re-raise with context but don't expose internal details
388
+ raise RuntimeError(f"Script execution error: {type(e).__name__}: {e}")
389
+
390
+
391
  # ---------------------- Analysis Script Generation ----------------------
392
 
393
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
 
523
  yield_update("```\n🧠 Generating aligned analysis script...\n```")
524
  analysis_script = _create_python_script(prompt_for_code, schema_context)
525
 
526
+ yield_update("```\n⚙️ Executing script in sandbox...\n```")
 
 
 
527
  try:
528
+ raw_data_output = execute_in_sandbox(analysis_script, dataframes)
529
+ except SandboxViolationError as e:
530
+ safe_log("sandbox_violation", {"error": str(e)})
531
+ return (
532
+ f"**Security Violation Detected**\n\n{e}\n\n"
533
+ f"The generated script attempted a forbidden operation. "
534
+ f"Please rephrase your request.\n\n"
535
+ f"Generated Script:\n```python\n{analysis_script}\n```"
536
+ )
537
  except Exception as e:
538
  return (
539
  f"An error occurred executing the script: {e}\n\nGenerated Script:\n"