Agent-Course-First_agent

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 6, 2025

Commit

b3b6478

verified ·

1 Parent(s): 0be1b82

Put more under the control of the agent

Browse files

Files changed (1) hide show

app.py +63 -52

app.py CHANGED Viewed

@@ -51,83 +51,94 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
-def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
     Args:
-        dataset_name: Name for the dataset (will be prefixed with username)
         conversation_data: String representing the conversation data
     Returns:
         URL of the created dataset or error message
     """
     try:
         # Get API key from environment variables
-        api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
         if not api_key:
             return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
-        # Force the username to be the known value
-        username = "Misfits-and-Machines"
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
-        # Sanitize dataset name - use underscores instead of dashes for better compatibility
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
         print(f"Creating dataset repository: {repo_id}")
-        # Prepare dataset with appropriate structure
-        # First, ensure we have a proper train split with necessary fields
-        dataset_dict = {
-            "text": [conversation_data],
-            "timestamp": [datetime.datetime.now().isoformat()],
-            "dataset_id": [str(uuid.uuid4())]
-        }
-        # Create a Hugging Face dataset
-        dataset = Dataset.from_dict(dataset_dict)
-        # Standard practice is to have a train split for datasets
-        dataset_dict = {"train": dataset}
-        # Check if the repository already exists
-        try:
-            repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset")
-            if repo_exists:
-                print(f"Repository {repo_id} already exists")
             else:
-                # Create repo if it doesn't exist
-                hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
-                print(f"Repository {repo_id} created successfully")
-        except Exception as repo_error:
-            print(f"Repository check/creation error: {str(repo_error)}")
-            # Continue anyway as push_to_hub might create the repo
-        # Push dataset to the Hub with appropriate parameters
-        print(f"Pushing dataset to {repo_id}")
-        # Create URL for monitoring - we'll show this to the user so they can check progress
-        dataset_url = f"https://huggingface.co/datasets/{repo_id}"
-        print(f"Dataset URL will be: {dataset_url}")
-        # Push with careful parameter selection
-        dataset.push_to_hub(
-            repo_id=repo_id,
-            token=api_key,
-            split="train",  # Use a proper split name
-            commit_message=f"Upload dataset: {dataset_name}"
-        )
-        print(f"Dataset successfully pushed to: {dataset_url}")
-        return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
-        return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
@@ -149,7 +160,7 @@ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
-        return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores."
 @tool

         return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
+def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data: str) -> str:
     """Creates and pushes a dataset to Hugging Face with the conversation history.
     Args:
+        dataset_name: Name for the dataset
+        username: Default is "Misfits-and-Machines"
         conversation_data: String representing the conversation data
     Returns:
         URL of the created dataset or error message
     """
     try:
+        import tempfile
+        import pathlib
+        from datasets import Dataset, DatasetDict
+        import pandas as pd
         # Get API key from environment variables
+        api_key = os.getenv("HF_API_KEY")
         if not api_key:
             return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
         # Initialize Hugging Face API
         hf_api = HfApi(token=api_key)
+        # Sanitize dataset name
         safe_dataset_name = dataset_name.replace(" ", "_").lower()
         repo_id = f"{username}/{safe_dataset_name}"
         print(f"Creating dataset repository: {repo_id}")
+        # Create a temporary directory to store the dataset files
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Convert data to DataFrame and save as CSV
+            df = pd.DataFrame({
+                "text": [conversation_data],
+                "timestamp": [datetime.datetime.now().isoformat()],
+                "dataset_id": [str(uuid.uuid4())]
+            })
+            # Save CSV in the temp directory
+            csv_path = pathlib.Path(tmp_dir) / "train.csv"
+            df.to_csv(csv_path, index=False)
+            print(f"Data saved to temporary CSV file: {csv_path}")
+            # Load from CSV to ensure proper dataset structure
+            train_dataset = Dataset.from_pandas(df)
+            # Create a DatasetDict with a train split
+            dataset_dict = DatasetDict({"train": train_dataset})
+            print(f"Created dataset with {len(train_dataset)} rows")
+            # Create the repository explicitly if it doesn't exist
+            try:
+                if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
+                    hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
+                    print(f"Repository {repo_id} created")
+                else:
+                    print(f"Repository {repo_id} already exists")
+            except Exception as repo_error:
+                print(f"Repository creation error: {str(repo_error)}")
+            # Push to Hugging Face Hub
+            print(f"Pushing dataset to {repo_id}")
+            # Use the DatasetDict push_to_hub method
+            dataset_dict.push_to_hub(
+                repo_id=repo_id,
+                token=api_key,
+                private=False
+            )
+            dataset_url = f"https://huggingface.co/datasets/{repo_id}"
+            print(f"Dataset successfully pushed to: {dataset_url}")
+            # Double-check that the repo exists
+            if hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
+                print(f"Verified: Repository {repo_id} exists")
             else:
+                print(f"Warning: Repository {repo_id} not found after push")
+            return f"Successfully created dataset at {dataset_url}"
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
         print(f"Dataset creation error: {str(e)}\n{error_trace}")
+        return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name"
 @tool
 def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
     except Exception as e:
         import traceback
         error_trace = traceback.format_exc()
+        return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
 @tool