certificate-generator / certificate_upload_module.py
ysharma's picture
ysharma HF Staff
Latest template; already-generated detection + recreate/replace flow
0eacc6f verified
Raw
History Blame Contribute Delete
6.36 kB
"""Upload generated certificates to the public Build Small gallery dataset."""
import os
import time
import tempfile
import logging
from PIL import Image as PILImage
from datasets import Dataset, Image, load_dataset, concatenate_datasets
from huggingface_hub import HfApi
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# PRIVATE dataset (restricted to the org "admins" resource group) β€” the archive of issued certificates
CERTIFICATE_DATASET_NAME = "build-small-hackathon/build-small-certificates"
HF_TOKEN = os.getenv("HF_TOKEN")
def get_certificate_image_path(hf_username):
"""Return a local PNG path of this user's existing certificate, or None if they have none."""
if not hf_username or not hf_username.strip():
return None
uname = hf_username.strip().lower()
try:
ds = load_dataset(CERTIFICATE_DATASET_NAME, split="train", token=HF_TOKEN)
except Exception:
return None
try:
labels = [str(x).strip().lower() for x in ds["label"]] # no image decode
if uname not in labels:
return None
img = ds[labels.index(uname)]["image"] # decode only the match
out = os.path.join(tempfile.gettempdir(), f"existing_cert_{uname}.png")
img.save(out, "PNG")
return out
except Exception as e:
logger.warning(f"get_certificate_image_path failed: {e}")
return None
def safe_add_certificate_to_dataset(certificate_image, hf_username, overwrite=False, max_retries=5, retry_delay=3):
"""Add a certificate image to the dataset. With overwrite=True, replace any existing
row for this username; otherwise refuse to duplicate."""
try:
if not hf_username or not hf_username.strip():
return False, "❌ Error: HF username is required"
if certificate_image is None:
return False, "❌ Error: Certificate image is required"
hf_username = hf_username.strip()
logger.info(f"Processing certificate for user: {hf_username}")
existing_dataset = None
load_successful = False
is_empty_dataset = False
for attempt in range(max_retries):
try:
existing_dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train", token=HF_TOKEN)
logger.info(f"Loaded {len(existing_dataset)} existing certificates")
load_successful = True
break
except Exception as load_error:
error_str = str(load_error).lower()
err_name = type(load_error).__name__.lower()
empty_signals = (
"emptydataset" in err_name
or "corresponds to no data" in error_str
or "doesn't contain any data" in error_str
or "no data" in error_str
or "doesn't exist" in error_str
or "not found" in error_str
)
if empty_signals:
logger.info("Dataset empty / not yet created β€” will create first entry")
is_empty_dataset = True
load_successful = True
existing_dataset = None
break
logger.warning(f"Attempt {attempt + 1} failed: {str(load_error)[:120]}")
if attempt < max_retries - 1:
time.sleep(retry_delay)
if not load_successful:
return False, ("❌ Certificate upload temporarily unavailable. Please try again in a few minutes.")
# Dedup by username (stored in the 'label' column)
if existing_dataset is not None:
present = hf_username in existing_dataset["label"]
if present and not overwrite:
return True, f"a certificate for '{hf_username}' already exists in the gallery."
if present and overwrite:
existing_dataset = existing_dataset.filter(
lambda ex: str(ex.get("label", "")).strip().lower() != hf_username.lower()
)
with tempfile.TemporaryDirectory() as temp_dir:
if isinstance(certificate_image, PILImage.Image):
temp_image_path = os.path.join(temp_dir, f"certificate_{hf_username}_{int(time.time())}.png")
certificate_image.save(temp_image_path, "PNG")
elif isinstance(certificate_image, str) and os.path.exists(certificate_image):
temp_image_path = certificate_image
else:
return False, "❌ Error: Invalid image format provided"
new_dataset = Dataset.from_dict(
{"image": [temp_image_path], "label": [hf_username]}
).cast_column("image", Image())
if existing_dataset is not None and not is_empty_dataset and len(existing_dataset) > 0:
combined_dataset = concatenate_datasets([existing_dataset, new_dataset])
else:
combined_dataset = new_dataset
try:
combined_dataset.push_to_hub(CERTIFICATE_DATASET_NAME, private=True, token=HF_TOKEN)
logger.info(f"Saved certificate. Total now: {len(combined_dataset)}")
return True, f"βœ… saved to the gallery for {hf_username}."
except Exception as upload_error:
msg = str(upload_error).lower()
if any(i in msg for i in ("rate limit", "429", "too many requests")):
return False, "⏳ Upload busy due to high load β€” please try again in 10–15 minutes."
logger.error(f"Upload failed: {upload_error}")
return False, f"❌ Certificate upload failed: {str(upload_error)}"
except Exception as e:
logger.error(f"Unexpected error in certificate upload: {e}")
return False, f"❌ Certificate upload failed: {str(e)}"
def upload_user_certificate(certificate_image, hf_username, overwrite=False):
"""Public entry point β€” returns (success, message)."""
if not certificate_image:
return False, "❌ No certificate image provided"
if not hf_username or not hf_username.strip():
return False, "❌ HF username is required"
return safe_add_certificate_to_dataset(certificate_image, hf_username, overwrite=overwrite)