Spaces:
Runtime error
Runtime error
Had to use eval for json parsing because json module is forbidden is this env
Browse files
app.py
CHANGED
|
@@ -55,6 +55,18 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
|
|
| 55 |
return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
| 59 |
"""Creates and pushes a dataset to Hugging Face with the conversation history.
|
| 60 |
|
|
@@ -66,11 +78,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 66 |
- Plain text (stored in a single 'text' column)
|
| 67 |
|
| 68 |
Returns:
|
| 69 |
-
URL of the created dataset or error message
|
| 70 |
"""
|
|
|
|
| 71 |
try:
|
| 72 |
-
# Required imports
|
| 73 |
-
import json
|
| 74 |
import pandas as pd
|
| 75 |
from datasets import Dataset, DatasetDict
|
| 76 |
from huggingface_hub import HfApi
|
|
@@ -85,41 +97,42 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 85 |
safe_dataset_name = dataset_name.replace(" ", "_").lower()
|
| 86 |
repo_id = f"{username}/{safe_dataset_name}"
|
| 87 |
|
| 88 |
-
|
| 89 |
|
| 90 |
# Ensure repository exists
|
| 91 |
hf_api = HfApi(token=api_key)
|
| 92 |
try:
|
| 93 |
if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
|
| 94 |
hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
|
| 95 |
-
|
| 96 |
else:
|
| 97 |
-
|
| 98 |
except Exception as e:
|
| 99 |
-
|
| 100 |
|
| 101 |
# Process input data
|
| 102 |
created_ds = None
|
| 103 |
try:
|
| 104 |
-
|
|
|
|
| 105 |
if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
|
| 106 |
-
|
| 107 |
df = pd.DataFrame(json_data)
|
| 108 |
ds = Dataset.from_pandas(df)
|
| 109 |
created_ds = DatasetDict({"train": ds})
|
| 110 |
elif isinstance(json_data, dict):
|
| 111 |
-
|
| 112 |
df = pd.DataFrame([json_data])
|
| 113 |
ds = Dataset.from_pandas(df)
|
| 114 |
created_ds = DatasetDict({"train": ds})
|
| 115 |
else:
|
| 116 |
raise ValueError("JSON not recognized as array or single object")
|
| 117 |
-
except
|
| 118 |
-
|
| 119 |
# Try pipe-separated format
|
| 120 |
lines = conversation_data.strip().split('\n')
|
| 121 |
if '|' in conversation_data and len(lines) > 1:
|
| 122 |
-
|
| 123 |
headers = [h.strip() for h in lines[0].split('|')]
|
| 124 |
data = []
|
| 125 |
for i, line in enumerate(lines[1:], 1):
|
|
@@ -129,7 +142,7 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 129 |
if len(values) == len(headers):
|
| 130 |
data.append(dict(zip(headers, values)))
|
| 131 |
else:
|
| 132 |
-
|
| 133 |
if data:
|
| 134 |
df = pd.DataFrame(data)
|
| 135 |
ds = Dataset.from_pandas(df)
|
|
@@ -138,11 +151,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 138 |
created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
|
| 139 |
else:
|
| 140 |
# Fallback for plain text
|
| 141 |
-
|
| 142 |
created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
|
| 143 |
|
| 144 |
# Push using the DatasetDict push_to_hub method.
|
| 145 |
-
|
| 146 |
created_ds.push_to_hub(
|
| 147 |
repo_id=repo_id,
|
| 148 |
token=api_key,
|
|
@@ -150,13 +163,13 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 150 |
)
|
| 151 |
|
| 152 |
dataset_url = f"https://huggingface.co/datasets/{repo_id}"
|
| 153 |
-
|
| 154 |
-
return f"Successfully created dataset at {dataset_url}"
|
| 155 |
except Exception as e:
|
| 156 |
import traceback
|
| 157 |
error_trace = traceback.format_exc()
|
| 158 |
-
|
| 159 |
-
return f"Error creating dataset: {str(e)}\n
|
| 160 |
|
| 161 |
@tool
|
| 162 |
def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
|
|
@@ -164,22 +177,23 @@ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
|
|
| 164 |
|
| 165 |
Args:
|
| 166 |
dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
|
| 167 |
-
conversation_data: Data content to save in the dataset.
|
| 168 |
-
1. JSON array of objects
|
| 169 |
Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
|
| 170 |
-
2. Pipe-separated values
|
| 171 |
Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
|
| 172 |
-
3. Plain text
|
| 173 |
|
| 174 |
Returns:
|
| 175 |
-
|
| 176 |
"""
|
| 177 |
try:
|
| 178 |
-
|
| 179 |
-
|
|
|
|
| 180 |
result = Dataset_Creator_Function(dataset_name, conversation_data)
|
| 181 |
-
|
| 182 |
-
return
|
| 183 |
except Exception as e:
|
| 184 |
import traceback
|
| 185 |
error_trace = traceback.format_exc()
|
|
|
|
| 55 |
return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
|
| 56 |
|
| 57 |
|
| 58 |
+
def parse_json(text: str):
|
| 59 |
+
"""
|
| 60 |
+
A minimal JSON parser workaround using eval.
|
| 61 |
+
Replaces 'true', 'false', and 'null' with their Python equivalents.
|
| 62 |
+
WARNING: Use only with trusted input.
|
| 63 |
+
"""
|
| 64 |
+
safe_text = text.replace("true", "True").replace("false", "False").replace("null", "None")
|
| 65 |
+
try:
|
| 66 |
+
return eval(safe_text, {"__builtins__": None}, {})
|
| 67 |
+
except Exception as e:
|
| 68 |
+
raise ValueError(f"Failed to parse JSON: {str(e)}")
|
| 69 |
+
|
| 70 |
def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
| 71 |
"""Creates and pushes a dataset to Hugging Face with the conversation history.
|
| 72 |
|
|
|
|
| 78 |
- Plain text (stored in a single 'text' column)
|
| 79 |
|
| 80 |
Returns:
|
| 81 |
+
URL of the created dataset or error message along with the log output.
|
| 82 |
"""
|
| 83 |
+
log_text = ""
|
| 84 |
try:
|
| 85 |
+
# Required imports (json replaced by our parse_json)
|
|
|
|
| 86 |
import pandas as pd
|
| 87 |
from datasets import Dataset, DatasetDict
|
| 88 |
from huggingface_hub import HfApi
|
|
|
|
| 97 |
safe_dataset_name = dataset_name.replace(" ", "_").lower()
|
| 98 |
repo_id = f"{username}/{safe_dataset_name}"
|
| 99 |
|
| 100 |
+
log_text += f"Creating dataset: {repo_id}\n"
|
| 101 |
|
| 102 |
# Ensure repository exists
|
| 103 |
hf_api = HfApi(token=api_key)
|
| 104 |
try:
|
| 105 |
if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
|
| 106 |
hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
|
| 107 |
+
log_text += f"Created repository: {repo_id}\n"
|
| 108 |
else:
|
| 109 |
+
log_text += f"Repository already exists: {repo_id}\n"
|
| 110 |
except Exception as e:
|
| 111 |
+
log_text += f"Note when checking/creating repository: {str(e)}\n"
|
| 112 |
|
| 113 |
# Process input data
|
| 114 |
created_ds = None
|
| 115 |
try:
|
| 116 |
+
# Use parse_json instead of json.loads
|
| 117 |
+
json_data = parse_json(conversation_data)
|
| 118 |
if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
|
| 119 |
+
log_text += f"Processing JSON array with {len(json_data)} items\n"
|
| 120 |
df = pd.DataFrame(json_data)
|
| 121 |
ds = Dataset.from_pandas(df)
|
| 122 |
created_ds = DatasetDict({"train": ds})
|
| 123 |
elif isinstance(json_data, dict):
|
| 124 |
+
log_text += "Processing single JSON object\n"
|
| 125 |
df = pd.DataFrame([json_data])
|
| 126 |
ds = Dataset.from_pandas(df)
|
| 127 |
created_ds = DatasetDict({"train": ds})
|
| 128 |
else:
|
| 129 |
raise ValueError("JSON not recognized as array or single object")
|
| 130 |
+
except Exception as e:
|
| 131 |
+
log_text += f"Not processing as JSON: {str(e)}\n"
|
| 132 |
# Try pipe-separated format
|
| 133 |
lines = conversation_data.strip().split('\n')
|
| 134 |
if '|' in conversation_data and len(lines) > 1:
|
| 135 |
+
log_text += "Processing as pipe-separated data\n"
|
| 136 |
headers = [h.strip() for h in lines[0].split('|')]
|
| 137 |
data = []
|
| 138 |
for i, line in enumerate(lines[1:], 1):
|
|
|
|
| 142 |
if len(values) == len(headers):
|
| 143 |
data.append(dict(zip(headers, values)))
|
| 144 |
else:
|
| 145 |
+
log_text += f"Warning: Skipping row {i} (column count mismatch)\n"
|
| 146 |
if data:
|
| 147 |
df = pd.DataFrame(data)
|
| 148 |
ds = Dataset.from_pandas(df)
|
|
|
|
| 151 |
created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
|
| 152 |
else:
|
| 153 |
# Fallback for plain text
|
| 154 |
+
log_text += "Processing as plain text\n"
|
| 155 |
created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
|
| 156 |
|
| 157 |
# Push using the DatasetDict push_to_hub method.
|
| 158 |
+
log_text += f"Pushing dataset to {repo_id}\n"
|
| 159 |
created_ds.push_to_hub(
|
| 160 |
repo_id=repo_id,
|
| 161 |
token=api_key,
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
dataset_url = f"https://huggingface.co/datasets/{repo_id}"
|
| 166 |
+
log_text += f"Dataset successfully pushed to: {dataset_url}\n"
|
| 167 |
+
return f"Successfully created dataset at {dataset_url}\nLogs:\n{log_text}"
|
| 168 |
except Exception as e:
|
| 169 |
import traceback
|
| 170 |
error_trace = traceback.format_exc()
|
| 171 |
+
log_text += f"Dataset creation error: {str(e)}\n{error_trace}\n"
|
| 172 |
+
return f"Error creating dataset: {str(e)}\nLogs:\n{log_text}"
|
| 173 |
|
| 174 |
@tool
|
| 175 |
def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
|
|
|
|
| 177 |
|
| 178 |
Args:
|
| 179 |
dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
|
| 180 |
+
conversation_data: Data content to save in the dataset. Formats supported:
|
| 181 |
+
1. JSON array of objects – Each object becomes a row (keys as columns).
|
| 182 |
Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
|
| 183 |
+
2. Pipe-separated values – First row as headers, remaining rows as values.
|
| 184 |
Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
|
| 185 |
+
3. Plain text – Stored in a single 'text' column.
|
| 186 |
|
| 187 |
Returns:
|
| 188 |
+
A link to the created dataset on the Hugging Face Hub or an error message, along with log details.
|
| 189 |
"""
|
| 190 |
try:
|
| 191 |
+
log_text = ""
|
| 192 |
+
log_text += f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data\n"
|
| 193 |
+
log_text += f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}\n"
|
| 194 |
result = Dataset_Creator_Function(dataset_name, conversation_data)
|
| 195 |
+
log_text += f"Dataset creation result: {result}\n"
|
| 196 |
+
return log_text
|
| 197 |
except Exception as e:
|
| 198 |
import traceback
|
| 199 |
error_trace = traceback.format_exc()
|