SolshineMisfit commited on
Commit
894372d
·
verified ·
1 Parent(s): d8d214d

reverted dataset save funcs to last version

Browse files
Files changed (1) hide show
  1. app.py +29 -42
app.py CHANGED
@@ -79,47 +79,56 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
79
 
80
  print(f"Creating dataset: {repo_id}")
81
 
82
- # Create a simple dataset from a dictionary with a train split
83
  data = {
84
  "text": [conversation_data],
85
  "timestamp": [datetime.datetime.now().isoformat()],
86
  "id": [str(uuid.uuid4())]
87
  }
88
 
89
- # Explicitly ensure the repository exists
90
- try:
91
- hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
92
- print(f"Repository {repo_id} created or already exists")
93
- except Exception as repo_error:
94
- print(f"Note about repository creation: {str(repo_error)}")
95
-
96
- # Create the dataset and push to hub
97
  dataset = Dataset.from_dict(data)
98
 
99
- # Push to Hugging Face Hub using the standard method
100
  dataset.push_to_hub(
101
- repo_id=repo_id,
102
- token=api_key,
103
- private=False
104
  )
105
 
106
  # Generate the URL for the dataset
107
  dataset_url = f"https://huggingface.co/datasets/{repo_id}"
108
  print(f"Dataset successfully pushed to: {dataset_url}")
109
 
110
- # Verify dataset exists by checking the API
111
- verify_result = verify_dataset_exists(repo_id)
112
- if verify_result["exists"]:
113
- return f"Successfully created dataset at {dataset_url}"
114
- else:
115
- return f"Dataset was uploaded, but it may take a few minutes to appear at {dataset_url}. Error: {verify_result['message']}"
116
-
117
  except Exception as e:
118
  import traceback
119
  error_trace = traceback.format_exc()
120
  print(f"Dataset creation error: {str(e)}\n{error_trace}")
121
  return f"Error creating dataset: {str(e)}\n\nTroubleshooting tips:\n1. Verify your HF_API_KEY is valid\n2. Try a simpler dataset name with only letters and underscores\n3. Check your permissions for the Misfits-and-Machines organization"
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def verify_dataset_exists(repo_id: str) -> dict:
124
  """Verify that a dataset exists and is valid on the Hugging Face Hub.
125
 
@@ -147,28 +156,6 @@ def verify_dataset_exists(repo_id: str) -> dict:
147
  except Exception as e:
148
  return {"exists": False, "message": f"Error verifying dataset: {str(e)}"}
149
 
150
- @tool
151
- def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
152
- """A tool that posts a new dataset of the current conversation to Hugging Face.
153
-
154
- Args:
155
- dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
156
- conversation_data: String content to save to the dataset
157
-
158
- Returns:
159
- Link to the created dataset or error message with troubleshooting steps
160
- """
161
- try:
162
- print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
163
- print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
164
- result = Dataset_Creator_Function(dataset_name, conversation_data)
165
- print(f"Dataset creation result: {result}")
166
- return result
167
- except Exception as e:
168
- import traceback
169
- error_trace = traceback.format_exc()
170
- return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
171
-
172
  @tool
173
  def Check_Dataset_Validity(dataset_name: str) -> str:
174
  """A tool that checks if a dataset exists and is valid on Hugging Face.
 
79
 
80
  print(f"Creating dataset: {repo_id}")
81
 
82
+ # Create a simple dataset from a dictionary
83
  data = {
84
  "text": [conversation_data],
85
  "timestamp": [datetime.datetime.now().isoformat()],
86
  "id": [str(uuid.uuid4())]
87
  }
88
 
89
+ # Create the dataset directly
 
 
 
 
 
 
 
90
  dataset = Dataset.from_dict(data)
91
 
92
+ # Push to Hugging Face Hub using the simpler method from documentation
93
  dataset.push_to_hub(
94
+ repo_id=repo_id, # Include username in repo_id
95
+ token=api_key, # Pass token explicitly
96
+ private=False # Make it public
97
  )
98
 
99
  # Generate the URL for the dataset
100
  dataset_url = f"https://huggingface.co/datasets/{repo_id}"
101
  print(f"Dataset successfully pushed to: {dataset_url}")
102
 
103
+ return f"Successfully created dataset at {dataset_url}"
 
 
 
 
 
 
104
  except Exception as e:
105
  import traceback
106
  error_trace = traceback.format_exc()
107
  print(f"Dataset creation error: {str(e)}\n{error_trace}")
108
  return f"Error creating dataset: {str(e)}\n\nTroubleshooting tips:\n1. Verify your HF_API_KEY is valid\n2. Try a simpler dataset name with only letters and underscores\n3. Check your permissions for the Misfits-and-Machines organization"
109
 
110
+ @tool
111
+ def Dataset_Creator_Tool(dataset_name: str, conversation_data: str) -> str:
112
+ """A tool that posts a new dataset of the current conversation to Hugging Face.
113
+
114
+ Args:
115
+ dataset_name: Name for the dataset (will be prefixed with 'Misfits-and-Machines/')
116
+ conversation_data: String content to save to the dataset
117
+
118
+ Returns:
119
+ Link to the created dataset or error message with troubleshooting steps
120
+ """
121
+ try:
122
+ print(f"Creating dataset '{dataset_name}' with {len(conversation_data)} characters of data")
123
+ print(f"Dataset will be created at Misfits-and-Machines/{dataset_name.replace(' ', '_').lower()}")
124
+ result = Dataset_Creator_Function(dataset_name, conversation_data)
125
+ print(f"Dataset creation result: {result}")
126
+ return result
127
+ except Exception as e:
128
+ import traceback
129
+ error_trace = traceback.format_exc()
130
+ return f"Error using Dataset Creator tool: {str(e)}\n{error_trace}"
131
+
132
  def verify_dataset_exists(repo_id: str) -> dict:
133
  """Verify that a dataset exists and is valid on the Hugging Face Hub.
134
 
 
156
  except Exception as e:
157
  return {"exists": False, "message": f"Error verifying dataset: {str(e)}"}
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  @tool
160
  def Check_Dataset_Validity(dataset_name: str) -> str:
161
  """A tool that checks if a dataset exists and is valid on Hugging Face.