SolshineMisfit commited on
Commit
b3b6478
·
verified ·
1 Parent(s): 0be1b82

Put more under the control of the agent

Browse files
Files changed (1) hide show
  1. app.py +63 -52
app.py CHANGED
@@ -51,83 +51,94 @@ def Sonar_Web_Search_Tool(arg1: str, arg2: str) -> str:
51
  return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
52
 
53
 
54
- def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
55
  """Creates and pushes a dataset to Hugging Face with the conversation history.
56
 
57
  Args:
58
- dataset_name: Name for the dataset (will be prefixed with username)
 
59
  conversation_data: String representing the conversation data
60
 
61
  Returns:
62
  URL of the created dataset or error message
63
  """
64
  try:
 
 
 
 
 
65
  # Get API key from environment variables
66
- api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
67
  if not api_key:
68
  return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
69
-
70
- # Force the username to be the known value
71
- username = "Misfits-and-Machines"
72
 
73
  # Initialize Hugging Face API
74
  hf_api = HfApi(token=api_key)
75
 
76
- # Sanitize dataset name - use underscores instead of dashes for better compatibility
77
  safe_dataset_name = dataset_name.replace(" ", "_").lower()
78
  repo_id = f"{username}/{safe_dataset_name}"
79
 
80
  print(f"Creating dataset repository: {repo_id}")
81
 
82
- # Prepare dataset with appropriate structure
83
- # First, ensure we have a proper train split with necessary fields
84
- dataset_dict = {
85
- "text": [conversation_data],
86
- "timestamp": [datetime.datetime.now().isoformat()],
87
- "dataset_id": [str(uuid.uuid4())]
88
- }
89
-
90
- # Create a Hugging Face dataset
91
- dataset = Dataset.from_dict(dataset_dict)
92
-
93
- # Standard practice is to have a train split for datasets
94
- dataset_dict = {"train": dataset}
95
-
96
- # Check if the repository already exists
97
- try:
98
- repo_exists = hf_api.repo_exists(repo_id=repo_id, repo_type="dataset")
99
- if repo_exists:
100
- print(f"Repository {repo_id} already exists")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
- # Create repo if it doesn't exist
103
- hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
104
- print(f"Repository {repo_id} created successfully")
105
- except Exception as repo_error:
106
- print(f"Repository check/creation error: {str(repo_error)}")
107
- # Continue anyway as push_to_hub might create the repo
108
-
109
- # Push dataset to the Hub with appropriate parameters
110
- print(f"Pushing dataset to {repo_id}")
111
-
112
- # Create URL for monitoring - we'll show this to the user so they can check progress
113
- dataset_url = f"https://huggingface.co/datasets/{repo_id}"
114
- print(f"Dataset URL will be: {dataset_url}")
115
-
116
- # Push with careful parameter selection
117
- dataset.push_to_hub(
118
- repo_id=repo_id,
119
- token=api_key,
120
- split="train", # Use a proper split name
121
- commit_message=f"Upload dataset: {dataset_name}"
122
- )
123
-
124
- print(f"Dataset successfully pushed to: {dataset_url}")
125
- return f"Successfully created dataset at {dataset_url} - please check this URL to verify your dataset is visible"
126
  except Exception as e:
127
  import traceback
128
  error_trace = traceback.format_exc()
129
  print(f"Dataset creation error: {str(e)}\n{error_trace}")
130
- return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name\n3. Check if you have write permissions for the Misfits-and-Machines organization"
131
 
132
  @tool
133
  def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
@@ -149,7 +160,7 @@ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
149
  except Exception as e:
150
  import traceback
151
  error_trace = traceback.format_exc()
152
- return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}\n\nPlease try with a simpler dataset name using only letters, numbers and underscores."
153
 
154
 
155
  @tool
 
51
  return f"Error using Sonar Websearch tool '{arg1} {arg2}': {str(e)}"
52
 
53
 
54
+ def Dataset_Creator_Function(dataset_name: str, username: str, conversation_data: str) -> str:
55
  """Creates and pushes a dataset to Hugging Face with the conversation history.
56
 
57
  Args:
58
+ dataset_name: Name for the dataset
59
+ username: Default is "Misfits-and-Machines"
60
  conversation_data: String representing the conversation data
61
 
62
  Returns:
63
  URL of the created dataset or error message
64
  """
65
  try:
66
+ import tempfile
67
+ import pathlib
68
+ from datasets import Dataset, DatasetDict
69
+ import pandas as pd
70
+
71
  # Get API key from environment variables
72
+ api_key = os.getenv("HF_API_KEY")
73
  if not api_key:
74
  return "Error: No Hugging Face API key found in environment variables. Please set HF_API_KEY or HUGGINGFACE_API_KEY."
 
 
 
75
 
76
  # Initialize Hugging Face API
77
  hf_api = HfApi(token=api_key)
78
 
79
+ # Sanitize dataset name
80
  safe_dataset_name = dataset_name.replace(" ", "_").lower()
81
  repo_id = f"{username}/{safe_dataset_name}"
82
 
83
  print(f"Creating dataset repository: {repo_id}")
84
 
85
+ # Create a temporary directory to store the dataset files
86
+ with tempfile.TemporaryDirectory() as tmp_dir:
87
+ # Convert data to DataFrame and save as CSV
88
+ df = pd.DataFrame({
89
+ "text": [conversation_data],
90
+ "timestamp": [datetime.datetime.now().isoformat()],
91
+ "dataset_id": [str(uuid.uuid4())]
92
+ })
93
+
94
+ # Save CSV in the temp directory
95
+ csv_path = pathlib.Path(tmp_dir) / "train.csv"
96
+ df.to_csv(csv_path, index=False)
97
+
98
+ print(f"Data saved to temporary CSV file: {csv_path}")
99
+
100
+ # Load from CSV to ensure proper dataset structure
101
+ train_dataset = Dataset.from_pandas(df)
102
+
103
+ # Create a DatasetDict with a train split
104
+ dataset_dict = DatasetDict({"train": train_dataset})
105
+ print(f"Created dataset with {len(train_dataset)} rows")
106
+
107
+ # Create the repository explicitly if it doesn't exist
108
+ try:
109
+ if not hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
110
+ hf_api.create_repo(repo_id=repo_id, repo_type="dataset")
111
+ print(f"Repository {repo_id} created")
112
+ else:
113
+ print(f"Repository {repo_id} already exists")
114
+ except Exception as repo_error:
115
+ print(f"Repository creation error: {str(repo_error)}")
116
+
117
+ # Push to Hugging Face Hub
118
+ print(f"Pushing dataset to {repo_id}")
119
+
120
+ # Use the DatasetDict push_to_hub method
121
+ dataset_dict.push_to_hub(
122
+ repo_id=repo_id,
123
+ token=api_key,
124
+ private=False
125
+ )
126
+
127
+ dataset_url = f"https://huggingface.co/datasets/{repo_id}"
128
+ print(f"Dataset successfully pushed to: {dataset_url}")
129
+
130
+ # Double-check that the repo exists
131
+ if hf_api.repo_exists(repo_id=repo_id, repo_type="dataset"):
132
+ print(f"Verified: Repository {repo_id} exists")
133
  else:
134
+ print(f"Warning: Repository {repo_id} not found after push")
135
+
136
+ return f"Successfully created dataset at {dataset_url}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  except Exception as e:
138
  import traceback
139
  error_trace = traceback.format_exc()
140
  print(f"Dataset creation error: {str(e)}\n{error_trace}")
141
+ return f"Error creating dataset: {str(e)}\n\nTo troubleshoot:\n1. Verify API key is valid\n2. Try with a different dataset name"
142
 
143
  @tool
144
  def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
 
160
  except Exception as e:
161
  import traceback
162
  error_trace = traceback.format_exc()
163
+ return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
164
 
165
 
166
  @tool