SolshineMisfit commited on
Commit
e95248e
·
verified ·
1 Parent(s): 140d4bf

Updated Dataset_Creator_Function

Browse files

This updated function:

Checks for API keys in both environment variable formats (HF_API_KEY and HUGGINGFACE_API_KEY)
Sets the default username to "Misfits-and-Machines" if authentication fails
Explicitly creates the repository before pushing data
Sets create_repo=True and repo_type="dataset" in push_to_hub
Adds more debug printing to help track issues
Properly sanitizes the dataset name (replacing spaces with dashes)
Adds a commit message for better tracking

Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -62,15 +62,26 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
62
  URL of the created dataset or error message
63
  """
64
  try:
 
 
 
 
 
65
  # Initialize Hugging Face API
66
- hf_api = HfApi(token=os.getenv("HUGGINGFACE_API_KEY", ""))
67
 
68
  # Get username for the repository name
69
- user_info = hf_api.whoami()
70
- username = user_info.get("name", "anonymous")
 
 
 
71
 
72
- # Format the full repo name
73
- repo_id = f"{username}/{dataset_name}"
 
 
 
74
 
75
  # Create a conversation object
76
  conversation = {
@@ -82,11 +93,27 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
82
  # Create a Hugging Face dataset
83
  dataset = Dataset.from_dict({"conversations": [conversation]})
84
 
85
- # Push to the Hugging Face Hub
86
- dataset.push_to_hub(repo_id, token=os.getenv("HUGGINGFACE_API_KEY", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  return f"Successfully created dataset at https://huggingface.co/datasets/{repo_id}"
89
  except Exception as e:
 
 
 
90
  return f"Error creating dataset: {str(e)}"
91
 
92
  @tool
 
62
  URL of the created dataset or error message
63
  """
64
  try:
65
+ # Get API key from environment variables - try both possible names
66
+ api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY", "")
67
+ if not api_key:
68
+ return "Error: No Hugging Face API key found in environment variables"
69
+
70
  # Initialize Hugging Face API
71
+ hf_api = HfApi(token=api_key)
72
 
73
  # Get username for the repository name
74
+ try:
75
+ user_info = hf_api.whoami()
76
+ username = user_info.get("name", "Misfits-and-Machines") # Use the provided username as default
77
+ except Exception as e:
78
+ return f"Error authenticating with Hugging Face: {str(e)}"
79
 
80
+ # Format the full repo name with sanitized dataset name
81
+ safe_dataset_name = dataset_name.replace(" ", "-").lower()
82
+ repo_id = f"{username}/{safe_dataset_name}"
83
+
84
+ print(f"Creating dataset repository: {repo_id}")
85
 
86
  # Create a conversation object
87
  conversation = {
 
93
  # Create a Hugging Face dataset
94
  dataset = Dataset.from_dict({"conversations": [conversation]})
95
 
96
+ # First try to create the repository explicitly
97
+ try:
98
+ hf_api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
99
+ print(f"Repository {repo_id} created or already exists")
100
+ except Exception as repo_error:
101
+ print(f"Note: Repository creation returned: {str(repo_error)}")
102
+
103
+ # Push to the Hugging Face Hub with explicit parameters
104
+ dataset.push_to_hub(
105
+ repo_id=repo_id,
106
+ token=api_key,
107
+ create_repo=True, # Create the repo if it doesn't exist
108
+ repo_type="dataset", # Explicitly specify this is a dataset
109
+ commit_message="Add conversation dataset"
110
+ )
111
 
112
  return f"Successfully created dataset at https://huggingface.co/datasets/{repo_id}"
113
  except Exception as e:
114
+ import traceback
115
+ error_trace = traceback.format_exc()
116
+ print(f"Dataset creation error: {str(e)}\n{error_trace}")
117
  return f"Error creating dataset: {str(e)}"
118
 
119
  @tool