SolshineMisfit commited on
Commit
4bd6f35
·
verified ·
1 Parent(s): 9e2fccb

Had to use eval for json parsing because json module is forbidden is this env

Browse files
Files changed (1) hide show
  1. app.py +43 -29
app.py CHANGED
@@ -55,6 +55,18 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
55
  return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
59
  """Creates and pushes a dataset to Hugging Face with the conversation history.
60
 
@@ -66,11 +78,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
66
  - Plain text (stored in a single 'text' column)
67
 
68
  Returns:
69
- URL of the created dataset or error message
70
  """
 
71
  try:
72
- # Required imports
73
- import json
74
  import pandas as pd
75
  from datasets import Dataset, DatasetDict
76
  from huggingface_hub import HfApi
@@ -85,41 +97,42 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
85
  safe_dataset_name = dataset_name.replace(" ", "_").lower()
86
  repo_id = f"{username}/{safe_dataset_name}"
87
 
88
- print(f"Creating dataset: {repo_id}")
89
 
90
  # Ensure repository exists
91
  hf_api = HfApi(token=api_key)
92
  try:
93
  if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
94
  hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
95
- print(f"Created repository: {repo_id}")
96
  else:
97
- print(f"Repository already exists: {repo_id}")
98
  except Exception as e:
99
- print(f"Note when checking/creating repository: {str(e)}")
100
 
101
  # Process input data
102
  created_ds = None
103
  try:
104
- json_data = json.loads(conversation_data)
 
105
  if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
106
- print(f"Processing JSON array with {len(json_data)} items")
107
  df = pd.DataFrame(json_data)
108
  ds = Dataset.from_pandas(df)
109
  created_ds = DatasetDict({"train": ds})
110
  elif isinstance(json_data, dict):
111
- print("Processing single JSON object")
112
  df = pd.DataFrame([json_data])
113
  ds = Dataset.from_pandas(df)
114
  created_ds = DatasetDict({"train": ds})
115
  else:
116
  raise ValueError("JSON not recognized as array or single object")
117
- except (json.JSONDecodeError, ValueError) as e:
118
- print(f"Not processing as JSON: {str(e)}")
119
  # Try pipe-separated format
120
  lines = conversation_data.strip().split('\n')
121
  if '|' in conversation_data and len(lines) > 1:
122
- print("Processing as pipe-separated data")
123
  headers = [h.strip() for h in lines[0].split('|')]
124
  data = []
125
  for i, line in enumerate(lines[1:], 1):
@@ -129,7 +142,7 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
129
  if len(values) == len(headers):
130
  data.append(dict(zip(headers, values)))
131
  else:
132
- print(f"Warning: Skipping row {i} (column count mismatch)")
133
  if data:
134
  df = pd.DataFrame(data)
135
  ds = Dataset.from_pandas(df)
@@ -138,11 +151,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
138
  created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
139
  else:
140
  # Fallback for plain text
141
- print("Processing as plain text")
142
  created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
143
 
144
  # Push using the DatasetDict push_to_hub method.
145
- print(f"Pushing dataset to {repo_id}")
146
  created_ds.push_to_hub(
147
  repo_id=repo_id,
148
  token=api_key,
@@ -150,13 +163,13 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
150
  )
151
 
152
  dataset_url = f"https://huggingface.co/datasets/{repo_id}"
153
- print(f"Dataset successfully pushed to: {dataset_url}")
154
- return f"Successfully created dataset at {dataset_url}"
155
  except Exception as e:
156
  import traceback
157
  error_trace = traceback.format_exc()
158
- print(f"Dataset creation error: {str(e)}\n{error_trace}")
159
- return f"Error creating dataset: {str(e)}\n\nTroubleshooting tips:\n1. Verify your HF_API_KEY is valid\n2. Use a simpler dataset name (letters and underscores only)"
160
 
161
  @tool
162
  def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
@@ -164,22 +177,23 @@ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
164
 
165
  Args:
166
  dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
167
- conversation_data: Data content to save in the dataset. Can be formatted in three ways:
168
- 1. JSON array of objects - Each object becomes a row in the dataset with its properties as columns:
169
  Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
170
- 2. Pipe-separated values - First row as headers, subsequent rows as values:
171
  Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
172
- 3. Plain text - Will be stored in a single 'text' column
173
 
174
  Returns:
175
- Link to the created dataset or error message with troubleshooting steps
176
  """
177
  try:
178
- print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
179
- print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
 
180
  result = Dataset_Creator_Function(dataset_name, conversation_data)
181
- print(f"Dataset creation result: {result}")
182
- return result
183
  except Exception as e:
184
  import traceback
185
  error_trace = traceback.format_exc()
 
55
  return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
56
 
57
 
58
+ def parse_json(text: str):
59
+ """
60
+ A minimal JSON parser workaround using eval.
61
+ Replaces 'true', 'false', and 'null' with their Python equivalents.
62
+ WARNING: Use only with trusted input.
63
+ """
64
+ safe_text = text.replace("true", "True").replace("false", "False").replace("null", "None")
65
+ try:
66
+ return eval(safe_text, {"__builtins__": None}, {})
67
+ except Exception as e:
68
+ raise ValueError(f"Failed to parse JSON: {str(e)}")
69
+
70
  def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
71
  """Creates and pushes a dataset to Hugging Face with the conversation history.
72
 
 
78
  - Plain text (stored in a single 'text' column)
79
 
80
  Returns:
81
+ URL of the created dataset or error message along with the log output.
82
  """
83
+ log_text = ""
84
  try:
85
+ # Required imports (json replaced by our parse_json)
 
86
  import pandas as pd
87
  from datasets import Dataset, DatasetDict
88
  from huggingface_hub import HfApi
 
97
  safe_dataset_name = dataset_name.replace(" ", "_").lower()
98
  repo_id = f"{username}/{safe_dataset_name}"
99
 
100
+ log_text += f"Creating dataset: {repo_id}\n"
101
 
102
  # Ensure repository exists
103
  hf_api = HfApi(token=api_key)
104
  try:
105
  if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
106
  hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
107
+ log_text += f"Created repository: {repo_id}\n"
108
  else:
109
+ log_text += f"Repository already exists: {repo_id}\n"
110
  except Exception as e:
111
+ log_text += f"Note when checking/creating repository: {str(e)}\n"
112
 
113
  # Process input data
114
  created_ds = None
115
  try:
116
+ # Use parse_json instead of json.loads
117
+ json_data = parse_json(conversation_data)
118
  if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
119
+ log_text += f"Processing JSON array with {len(json_data)} items\n"
120
  df = pd.DataFrame(json_data)
121
  ds = Dataset.from_pandas(df)
122
  created_ds = DatasetDict({"train": ds})
123
  elif isinstance(json_data, dict):
124
+ log_text += "Processing single JSON object\n"
125
  df = pd.DataFrame([json_data])
126
  ds = Dataset.from_pandas(df)
127
  created_ds = DatasetDict({"train": ds})
128
  else:
129
  raise ValueError("JSON not recognized as array or single object")
130
+ except Exception as e:
131
+ log_text += f"Not processing as JSON: {str(e)}\n"
132
  # Try pipe-separated format
133
  lines = conversation_data.strip().split('\n')
134
  if '|' in conversation_data and len(lines) > 1:
135
+ log_text += "Processing as pipe-separated data\n"
136
  headers = [h.strip() for h in lines[0].split('|')]
137
  data = []
138
  for i, line in enumerate(lines[1:], 1):
 
142
  if len(values) == len(headers):
143
  data.append(dict(zip(headers, values)))
144
  else:
145
+ log_text += f"Warning: Skipping row {i} (column count mismatch)\n"
146
  if data:
147
  df = pd.DataFrame(data)
148
  ds = Dataset.from_pandas(df)
 
151
  created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
152
  else:
153
  # Fallback for plain text
154
+ log_text += "Processing as plain text\n"
155
  created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
156
 
157
  # Push using the DatasetDict push_to_hub method.
158
+ log_text += f"Pushing dataset to {repo_id}\n"
159
  created_ds.push_to_hub(
160
  repo_id=repo_id,
161
  token=api_key,
 
163
  )
164
 
165
  dataset_url = f"https://huggingface.co/datasets/{repo_id}"
166
+ log_text += f"Dataset successfully pushed to: {dataset_url}\n"
167
+ return f"Successfully created dataset at {dataset_url}\nLogs:\n{log_text}"
168
  except Exception as e:
169
  import traceback
170
  error_trace = traceback.format_exc()
171
+ log_text += f"Dataset creation error: {str(e)}\n{error_trace}\n"
172
+ return f"Error creating dataset: {str(e)}\nLogs:\n{log_text}"
173
 
174
  @tool
175
  def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
 
177
 
178
  Args:
179
  dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
180
+ conversation_data: Data content to save in the dataset. Formats supported:
181
+ 1. JSON array of objects Each object becomes a row (keys as columns).
182
  Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
183
+ 2. Pipe-separated values First row as headers, remaining rows as values.
184
  Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
185
+ 3. Plain text Stored in a single 'text' column.
186
 
187
  Returns:
188
+ A link to the created dataset on the Hugging Face Hub or an error message, along with log details.
189
  """
190
  try:
191
+ log_text = ""
192
+ log_text += f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data\n"
193
+ log_text += f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}\n"
194
  result = Dataset_Creator_Function(dataset_name, conversation_data)
195
+ log_text += f"Dataset creation result: {result}\n"
196
+ return log_text
197
  except Exception as e:
198
  import traceback
199
  error_trace = traceback.format_exc()