materialcharacterize / dataset_utils.py
qurashiubaid's picture
Upload 7 files
638afcf verified
import os
import json
import uuid
from huggingface_hub import HfApi
from huggingface_hub.utils import HfHubHTTPError
def contribute_to_dataset(results, sample_name, repo_id, token=None):
"""
Safely contribute to dataset with error handling
"""
try:
# Prepare anonymized entry
entry = {
"id": str(uuid.uuid4()),
"sample_name": sample_name,
"modalities": [k for k in results.keys() if k != 'sample_name'],
"results": {k: v for k, v in results.items() if k != 'sample_name'}
}
# Save locally first
os.makedirs("tmp", exist_ok=True)
local_path = f"tmp/{entry['id']}.json"
with open(local_path, "w") as f:
json.dump(entry, f)
# Upload to HF
api = HfApi(token=token)
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=f"entries/{entry['id']}.json",
repo_id=repo_id,
repo_type="dataset",
commit_message=f"Add sample: {sample_name}"
)
return True, "Successfully contributed to dataset!"
except HfHubHTTPError as e:
if "401" in str(e):
return False, "Authentication required to contribute to dataset."
else:
return False, f"Dataset contribution failed: {str(e)}"
except Exception as e:
return False, f"Unexpected error: {str(e)}"