SolshineMisfit commited on
Commit
116bda5
·
verified ·
1 Parent(s): 4bd6f35

literal instead of eval

Browse files
Files changed (1) hide show
  1. app.py +31 -23
app.py CHANGED
@@ -57,14 +57,17 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
57
 
58
  def parse_json(text: str):
59
  """
60
- A minimal JSON parser workaround using eval.
61
- Replaces 'true', 'false', and 'null' with their Python equivalents.
62
- WARNING: Use only with trusted input.
63
  """
64
- safe_text = text.replace("true", "True").replace("false", "False").replace("null", "None")
 
 
65
  try:
66
- return eval(safe_text, {"__builtins__": None}, {})
67
- except Exception as e:
 
68
  raise ValueError(f"Failed to parse JSON: {str(e)}")
69
 
70
  def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
@@ -82,7 +85,7 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
82
  """
83
  log_text = ""
84
  try:
85
- # Required imports (json replaced by our parse_json)
86
  import pandas as pd
87
  from datasets import Dataset, DatasetDict
88
  from huggingface_hub import HfApi
@@ -113,22 +116,27 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
113
  # Process input data
114
  created_ds = None
115
  try:
116
- # Use parse_json instead of json.loads
117
- json_data = parse_json(conversation_data)
118
- if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
119
- log_text += f"Processing JSON array with {len(json_data)} items\n"
120
- df = pd.DataFrame(json_data)
121
- ds = Dataset.from_pandas(df)
122
- created_ds = DatasetDict({"train": ds})
123
- elif isinstance(json_data, dict):
124
- log_text += "Processing single JSON object\n"
125
- df = pd.DataFrame([json_data])
126
- ds = Dataset.from_pandas(df)
127
- created_ds = DatasetDict({"train": ds})
128
- else:
129
- raise ValueError("JSON not recognized as array or single object")
130
- except Exception as e:
131
- log_text += f"Not processing as JSON: {str(e)}\n"
 
 
 
 
 
132
  # Try pipe-separated format
133
  lines = conversation_data.strip().split('\n')
134
  if '|' in conversation_data and len(lines) > 1:
 
57
 
58
  def parse_json(text: str):
59
  """
60
+ A safer JSON parser using ast.literal_eval.
61
+ Converts JSON-like strings to Python objects without executing code.
62
+ Handles common JSON literals (true, false, null) by converting them to Python equivalents.
63
  """
64
+ # Replace JSON literals with Python equivalents
65
+ prepared_text = text.replace("true", "True").replace("false", "False").replace("null", "None")
66
+
67
  try:
68
+ import ast
69
+ return ast.literal_eval(prepared_text)
70
+ except (SyntaxError, ValueError) as e:
71
  raise ValueError(f"Failed to parse JSON: {str(e)}")
72
 
73
  def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
 
85
  """
86
  log_text = ""
87
  try:
88
+ # Required imports
89
  import pandas as pd
90
  from datasets import Dataset, DatasetDict
91
  from huggingface_hub import HfApi
 
116
  # Process input data
117
  created_ds = None
118
  try:
119
+ # Try parsing as JSON using the safer parse_json function
120
+ try:
121
+ json_data = parse_json(conversation_data)
122
+
123
+ # Process based on data structure
124
+ if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
125
+ log_text += f"Processing JSON array with {len(json_data)} items\n"
126
+ df = pd.DataFrame(json_data)
127
+ ds = Dataset.from_pandas(df)
128
+ created_ds = DatasetDict({"train": ds})
129
+ elif isinstance(json_data, dict):
130
+ log_text += "Processing single JSON object\n"
131
+ df = pd.DataFrame([json_data])
132
+ ds = Dataset.from_pandas(df)
133
+ created_ds = DatasetDict({"train": ds})
134
+ else:
135
+ raise ValueError("JSON not recognized as array or single object")
136
+ except Exception as json_error:
137
+ log_text += f"Not processing as JSON: {str(json_error)}\n"
138
+ raise json_error # Propagate to next handler
139
+ except Exception:
140
  # Try pipe-separated format
141
  lines = conversation_data.strip().split('\n')
142
  if '|' in conversation_data and len(lines) > 1: