Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 6

Commit

4bd6f35

verified ·

1 Parent(s): 9e2fccb

Had to use eval for json parsing because json module is forbidden is this env

Browse files

Files changed (1) hide show

app.py +43 -29

app.py CHANGED Viewed

@@ -55,6 +55,18 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
 def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
@@ -66,11 +78,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
             - Plain text (stored in a single 'text' column)
     Returns:
-        URL of the created dataset or error message
     """
     try:
-        # Required imports
-        import json
         import pandas as pd
         from datasets import Dataset, DatasetDict
         from huggingface_hub import HfApi
@@ -85,41 +97,42 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
-        print(f"Creating dataset: {repo_id}")
         # Ensure repository exists
         hf_api = HfApi(token=api_key)
         try:
             if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
                 hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
-                print(f"Created repository: {repo_id}")
             else:
-                print(f"Repository already exists: {repo_id}")
         except Exception as e:
-            print(f"Note when checking/creating repository: {str(e)}")
         # Process input data
         created_ds = None
         try:
-            json_data = json.loads(conversation_data)
             if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
-                print(f"Processing JSON array with {len(json_data)} items")
                 df = pd.DataFrame(json_data)
                 ds = Dataset.from_pandas(df)
                 created_ds = DatasetDict({"train": ds})
             elif isinstance(json_data, dict):
-                print("Processing single JSON object")
                 df = pd.DataFrame([json_data])
                 ds = Dataset.from_pandas(df)
                 created_ds = DatasetDict({"train": ds})
             else:
                 raise ValueError("JSON not recognized as array or single object")
-        except (json.JSONDecodeError, ValueError) as e:
-            print(f"Not processing as JSON: {str(e)}")
             # Try pipe-separated format
             lines = conversation_data.strip().split('\n')
             if '|' in conversation_data and len(lines) > 1:
-                print("Processing as pipe-separated data")
                 headers = [h.strip() for h in lines[0].split('|')]
                 data = []
                 for i, line in enumerate(lines[1:], 1):
@@ -129,7 +142,7 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
                     if len(values) == len(headers):
                         data.append(dict(zip(headers, values)))
                     else:
-                        print(f"Warning: Skipping row {i} (column count mismatch)")
                 if data:
                     df = pd.DataFrame(data)
                     ds = Dataset.from_pandas(df)
@@ -138,11 +151,11 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
                     created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
             else:
                 # Fallback for plain text
-                print("Processing as plain text")
                 created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
         # Push using the DatasetDict push_to_hub method.
-        print(f"Pushing dataset to {repo_id}")
         created_ds.push_to_hub(
             repo_id=repo_id,
             token=api_key,
@@ -150,13 +163,13 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
         )
         dataset_url = f"https://huggingface.co/datasets/{repo_id}"
-        print(f"Dataset successfully pushed to: {dataset_url}")
-        return f"Successfully created dataset at {dataset_url}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
-        print(f"Dataset creation error: {str(e)}\n{error_trace}")
-        return f"Error creating dataset: {str(e)}\n\nTroubleshooting tips:\n1. Verify your HF_API_KEY is valid\n2. Use a simpler dataset name (letters and underscores only)"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
@@ -164,22 +177,23 @@ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     Args:
         dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
-        conversation_data: Data content to save in the dataset. Can be formatted in three ways:
-            1. JSON array of objects - Each object becomes a row in the dataset with its properties as columns:
                Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
-            2. Pipe-separated values - First row as headers, subsequent rows as values:
                Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
-            3. Plain text - Will be stored in a single 'text' column
     Returns:
-        Link to the created dataset or error message with troubleshooting steps
     """
     try:
-        print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
-        print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
         result = Dataset_Creator_Function(dataset_name, conversation_data)
-        print(f"Dataset creation result: {result}")
-        return result
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()

         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
+def parse_json(text: str):
+    """
+    A minimal JSON parser workaround using eval.
+    Replaces 'true', 'false', and 'null' with their Python equivalents.
+    WARNING: Use only with trusted input.
+    """
+    safe_text = text.replace("true", "True").replace("false", "False").replace("null", "None")
+    try:
+        return eval(safe_text, {"__builtins__": None}, {})
+    except Exception as e:
+        raise ValueError(f"Failed to parse JSON: {str(e)}")
 def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
             - Plain text (stored in a single 'text' column)
     Returns:
+        URL of the created dataset or error message along with the log output.
     """
+    log_text = ""
     try:
+        # Required imports (json replaced by our parse_json)
         import pandas as pd
         from datasets import Dataset, DatasetDict
         from huggingface_hub import HfApi
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
+        log_text += f"Creating dataset: {repo_id}\n"
         # Ensure repository exists
         hf_api = HfApi(token=api_key)
         try:
             if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
                 hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
+                log_text += f"Created repository: {repo_id}\n"
             else:
+                log_text += f"Repository already exists: {repo_id}\n"
         except Exception as e:
+            log_text += f"Note when checking/creating repository: {str(e)}\n"
         # Process input data
         created_ds = None
         try:
+            # Use parse_json instead of json.loads
+            json_data = parse_json(conversation_data)
             if isinstance(json_data, list) and all(isinstance(item, dict) for item in json_data):
+                log_text += f"Processing JSON array with {len(json_data)} items\n"
                 df = pd.DataFrame(json_data)
                 ds = Dataset.from_pandas(df)
                 created_ds = DatasetDict({"train": ds})
             elif isinstance(json_data, dict):
+                log_text += "Processing single JSON object\n"
                 df = pd.DataFrame([json_data])
                 ds = Dataset.from_pandas(df)
                 created_ds = DatasetDict({"train": ds})
             else:
                 raise ValueError("JSON not recognized as array or single object")
+        except Exception as e:
+            log_text += f"Not processing as JSON: {str(e)}\n"
             # Try pipe-separated format
             lines = conversation_data.strip().split('\n')
             if '|' in conversation_data and len(lines) > 1:
+                log_text += "Processing as pipe-separated data\n"
                 headers = [h.strip() for h in lines[0].split('|')]
                 data = []
                 for i, line in enumerate(lines[1:], 1):
                     if len(values) == len(headers):
                         data.append(dict(zip(headers, values)))
                     else:
+                        log_text += f"Warning: Skipping row {i} (column count mismatch)\n"
                 if data:
                     df = pd.DataFrame(data)
                     ds = Dataset.from_pandas(df)
                     created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
             else:
                 # Fallback for plain text
+                log_text += "Processing as plain text\n"
                 created_ds = DatasetDict({"train": Dataset.from_dict({"text": [conversation_data]})})
         # Push using the DatasetDict push_to_hub method.
+        log_text += f"Pushing dataset to {repo_id}\n"
         created_ds.push_to_hub(
             repo_id=repo_id,
             token=api_key,
         )
         dataset_url = f"https://huggingface.co/datasets/{repo_id}"
+        log_text += f"Dataset successfully pushed to: {dataset_url}\n"
+        return f"Successfully created dataset at {dataset_url}\nLogs:\n{log_text}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
+        log_text += f"Dataset creation error: {str(e)}\n{error_trace}\n"
+        return f"Error creating dataset: {str(e)}\nLogs:\n{log_text}"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     Args:
         dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
+        conversation_data: Data content to save in the dataset. Formats supported:
+            1. JSON array of objects – Each object becomes a row (keys as columns).
                Example: [{"name": "Product A", "brand": "Company X"}, {"name": "Product B", "brand": "Company Y"}]
+            2. Pipe-separated values – First row as headers, remaining rows as values.
                Example: "name | brand\nProduct A | Company X\nProduct B | Company Y"
+            3. Plain text – Stored in a single 'text' column.
     Returns:
+        A link to the created dataset on the Hugging Face Hub or an error message, along with log details.
     """
     try:
+        log_text = ""
+        log_text += f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data\n"
+        log_text += f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}\n"
         result = Dataset_Creator_Function(dataset_name, conversation_data)
+        log_text += f"Dataset creation result: {result}\n"
+        return log_text
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()