Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 6

Commit

13b9d4a

verified ·

1 Parent(s): 3f1cc0a

Reverted to docs way for dataset upload

Browse files

Files changed (1) hide show

app.py +35 -69

app.py CHANGED Viewed

@@ -51,27 +51,24 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
-def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
     Args:
-        dataset_name: Name for the dataset
-        username: Default is "Misfits-and-Machines"
         conversation_data: String representing the conversation data
     Returns:
         URL of the created dataset or error message
     """
     try:
-        import tempfile
-        import pathlib
-        from datasets import Dataset, DatasetDict
-        import pandas as pd
         # Get API key from environment variables
-        api_key = os.getenv("HF_API_KEY")
         if not api_key:
-            return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
@@ -80,74 +77,43 @@ def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
-        print(f"Creating dataset repository: {repo_id}")
-        # Create a temporary directory to store the dataset files
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            # Convert data to DataFrame and save as CSV
-            df = pd.DataFrame({
-                "text": [conversation_data],
-                "timestamp": [datetime.datetime.now().isoformat()],
-                "dataset_id": [str(uuid.uuid4())]
-            })
-            # Save CSV in the temp directory
-            csv_path = pathlib.Path(tmp_dir) / "train.csv"
-            df.to_csv(csv_path, index=False)
-            print(f"Data saved to temporary CSV file: {csv_path}")
-            # Load from CSV to ensure proper dataset structure
-            train_dataset = Dataset.from_pandas(df)
-            # Create a DatasetDict with a train split
-            dataset_dict = DatasetDict({"train": train_dataset})
-            print(f"Created dataset with {len(train_dataset)} rows")
-            # Create the repository explicitly if it doesn't exist
-            try:
-                if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
-                    hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
-                    print(f"Repository {repo_id} created")
-                else:
-                    print(f"Repository {repo_id} already exists")
-            except Exception as repo_error:
-                print(f"Repository creation error: {str(repo_error)}")
-            # Push to Hugging Face Hub
-            print(f"Pushing dataset to {repo_id}")
-            # Use the DatasetDict push_to_hub method
-            dataset_dict.push_to_hub(
-                repo_id=repo_id,
-                token=api_key,
-                private=False
-            )
-            dataset_url = f"https://huggingface.co/datasets/{repo_id}"
-            print(f"Dataset successfully pushed to: {dataset_url}")
-            # Double-check that the repo exists
-            if hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
-                print(f"Verified: Repository {repo_id} exists")
-            else:
-                print(f"Warning: Repository {repo_id} not found after push")
-            return f"Successfully created dataset at {dataset_url}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
-        return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name"
 @tool
-def Dataset_Creator_Tool(dataset_name: str, username: str, conversation_data: str) -> str:
     """A tool that posts a new dataset of the current conversation to Hugging Face.
     Args:
-        dataset_name: Name for the dataset
-        username: (Default should be 'Misfits-and-Machines/')
-        conversation_data: String content to save to the dataset (no JSON conversion needed)
     Returns:
         Link to the created dataset or error message with troubleshooting steps
@@ -155,7 +121,7 @@ def Dataset_Creator_Tool(dataset_name: str, username: str, conversation_data: st
     try:
         print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
         print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
-        result = Dataset_Creator_Function(dataset_name, username, conversation_data)
         print(f"Dataset creation result: {result}")
         return result
     except Exception as e:

         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
+def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
     Args:
+        dataset_name: Name for the dataset (will be prefixed with username)
         conversation_data: String representing the conversation data
     Returns:
         URL of the created dataset or error message
     """
     try:
         # Get API key from environment variables
+        api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
         if not api_key:
+            return "Error: No Hugging Face API key found in environment variables"
+        # Set fixed username for dataset organization
+        username = "Misfits-and-Machines"
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
+        print(f"Creating dataset: {repo_id}")
+        # Create a simple dataset from a dictionary
+        data = {
+            "text": [conversation_data],
+            "timestamp": [datetime.datetime.now().isoformat()],
+            "id": [str(uuid.uuid4())]
+        }
+        # Create the dataset directly
+        dataset = Dataset.from_dict(data)
+        # Push to Hugging Face Hub using the simpler method from documentation
+        dataset.push_to_hub(
+            repo_id=repo_id,  # Include username in repo_id
+            token=api_key,    # Pass token explicitly
+            private=False     # Make it public
+        )
+        # Generate the URL for the dataset
+        dataset_url = f"https://huggingface.co/datasets/{repo_id}"
+        print(f"Dataset successfully pushed to: {dataset_url}")
+        return f"Successfully created dataset at {dataset_url}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
+        return f"Error creating dataset: {str(e)}\n\nTroubleshooting tips:\n1. Verify your HF_API_KEY is valid\n2. Try a simpler dataset name with only letters and underscores\n3. Check your permissions for the Misfits-and-Machines organization"
 @tool
+def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     """A tool that posts a new dataset of the current conversation to Hugging Face.
     Args:
+        dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
+        conversation_data: String content to save to the dataset
     Returns:
         Link to the created dataset or error message with troubleshooting steps
     try:
         print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
         print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
+        result = Dataset_Creator_Function(dataset_name, conversation_data)
         print(f"Dataset creation result: {result}")
         return result
     except Exception as e: