Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 6

Commit

0be1b82

verified ·

1 Parent(s): 5282c5b

agent can now use this tool with clearer feedback on what's happening, and users will be directed to the correct URL where they can verify if the dataset appeared on Hugging Face.

Browse files

1. Uses underscores instead of dashes in dataset names for better compatibility
2. Creates a proper dataset structure with a "train" split (standard practice for HF datasets)
3. Sets split="train" in push_to_hub to ensure proper organization
4. Provides more verbose logging throughout the process
5. Returns a clear URL for the user to check their dataset
6. Offers troubleshooting steps when errors occur
7. Properly handles repository existence checks
8. Uses a more descriptive commit message

Files changed (1) hide show

app.py +45 -23

app.py CHANGED Viewed

@@ -65,7 +65,7 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
         # Get API key from environment variables
         api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
         if not api_key:
-            return "Error: No Hugging Face API key found in environment variables"
         # Force the username to be the known value
         username = "Misfits-and-Machines"
@@ -73,61 +73,83 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
-        # Sanitize dataset name and create repo_id
-        safe_dataset_name = dataset_name.replace(" ", "-").lower()
         repo_id = f"{username}/{safe_dataset_name}"
         print(f"Creating dataset repository: {repo_id}")
-        # Create the repository explicitly
         try:
-            hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
-            print(f"Repository {repo_id} created or confirmed")
         except Exception as repo_error:
-            print(f"Repository creation note: {str(repo_error)}")
-        # Build conversation object
-        conversation = {
-            "text": conversation_data,
-            "timestamp": datetime.datetime.now().isoformat(),
-            "dataset_id": str(uuid.uuid4())
-        }
-        # Create a Hugging Face dataset
-        dataset = Dataset.from_dict({"conversations": [conversation]})
-        # Push dataset to the Hub with minimal parameters
         dataset.push_to_hub(
             repo_id=repo_id,
-            token=api_key
         )
-        dataset_url = f"https://huggingface.co/datasets/{repo_id}"
         print(f"Dataset successfully pushed to: {dataset_url}")
-        return f"Successfully created dataset: {dataset_url}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
-        return f"Error creating dataset: {str(e)}"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     """A tool that posts a new dataset of the current conversation to Hugging Face.
     Args:
-        dataset_name: Name for the dataset (will be prefixed with username)
         conversation_data: String content to save to the dataset (no JSON conversion needed)
     """
     try:
         print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
         result = Dataset_Creator_Function(dataset_name, conversation_data)
-        print(f"Result: {result}")
         return result
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
-        return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
 @tool

         # Get API key from environment variables
         api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
         if not api_key:
+            return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
         # Force the username to be the known value
         username = "Misfits-and-Machines"
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
+        # Sanitize dataset name - use underscores instead of dashes for better compatibility
+        safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
         print(f"Creating dataset repository: {repo_id}")
+        # Prepare dataset with appropriate structure
+        # First, ensure we have a proper train split with necessary fields
+        dataset_dict = {
+            "text": [conversation_data],
+            "timestamp": [datetime.datetime.now().isoformat()],
+            "dataset_id": [str(uuid.uuid4())]
+        }
+        # Create a Hugging Face dataset
+        dataset = Dataset.from_dict(dataset_dict)
+        # Standard practice is to have a train split for datasets
+        dataset_dict = {"train": dataset}
+        # Check if the repository already exists
         try:
+            repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset")
+            if repo_exists:
+                print(f"Repository {repo_id} already exists")
+            else:
+                # Create repo if it doesn't exist
+                hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
+                print(f"Repository {repo_id} created successfully")
         except Exception as repo_error:
+            print(f"Repository check/creation error: {str(repo_error)}")
+            # Continue anyway as push_to_hub might create the repo
+        # Push dataset to the Hub with appropriate parameters
+        print(f"Pushing dataset to {repo_id}")
+        # Create URL for monitoring - we'll show this to the user so they can check progress
+        dataset_url = f"https://huggingface.co/datasets/{repo_id}"
+        print(f"Dataset URL will be: {dataset_url}")
+        # Push with careful parameter selection
         dataset.push_to_hub(
             repo_id=repo_id,
+            token=api_key,
+            split="train",  # Use a proper split name
+            commit_message=f"Upload dataset: {dataset_name}"
         )
         print(f"Dataset successfully pushed to: {dataset_url}")
+        return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
+        return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     """A tool that posts a new dataset of the current conversation to Hugging Face.
     Args:
+        dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
         conversation_data: String content to save to the dataset (no JSON conversion needed)
+    Returns:
+        Link to the created dataset or error message with troubleshooting steps
     """
     try:
         print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
+        print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
         result = Dataset_Creator_Function(dataset_name, conversation_data)
+        print(f"Dataset creation result: {result}")
         return result
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
+        return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores."
 @tool