"""Upload generated certificates to the public Build Small gallery dataset.""" import os import time import tempfile import logging from PIL import Image as PILImage from datasets import Dataset, Image, load_dataset, concatenate_datasets from huggingface_hub import HfApi logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # PRIVATE dataset (restricted to the org "admins" resource group) — the archive of issued certificates CERTIFICATE_DATASET_NAME = "build-small-hackathon/build-small-certificates" HF_TOKEN = os.getenv("HF_TOKEN") def get_certificate_image_path(hf_username): """Return a local PNG path of this user's existing certificate, or None if they have none.""" if not hf_username or not hf_username.strip(): return None uname = hf_username.strip().lower() try: ds = load_dataset(CERTIFICATE_DATASET_NAME, split="train", token=HF_TOKEN) except Exception: return None try: labels = [str(x).strip().lower() for x in ds["label"]] # no image decode if uname not in labels: return None img = ds[labels.index(uname)]["image"] # decode only the match out = os.path.join(tempfile.gettempdir(), f"existing_cert_{uname}.png") img.save(out, "PNG") return out except Exception as e: logger.warning(f"get_certificate_image_path failed: {e}") return None def safe_add_certificate_to_dataset(certificate_image, hf_username, overwrite=False, max_retries=5, retry_delay=3): """Add a certificate image to the dataset. With overwrite=True, replace any existing row for this username; otherwise refuse to duplicate.""" try: if not hf_username or not hf_username.strip(): return False, "❌ Error: HF username is required" if certificate_image is None: return False, "❌ Error: Certificate image is required" hf_username = hf_username.strip() logger.info(f"Processing certificate for user: {hf_username}") existing_dataset = None load_successful = False is_empty_dataset = False for attempt in range(max_retries): try: existing_dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train", token=HF_TOKEN) logger.info(f"Loaded {len(existing_dataset)} existing certificates") load_successful = True break except Exception as load_error: error_str = str(load_error).lower() err_name = type(load_error).__name__.lower() empty_signals = ( "emptydataset" in err_name or "corresponds to no data" in error_str or "doesn't contain any data" in error_str or "no data" in error_str or "doesn't exist" in error_str or "not found" in error_str ) if empty_signals: logger.info("Dataset empty / not yet created — will create first entry") is_empty_dataset = True load_successful = True existing_dataset = None break logger.warning(f"Attempt {attempt + 1} failed: {str(load_error)[:120]}") if attempt < max_retries - 1: time.sleep(retry_delay) if not load_successful: return False, ("❌ Certificate upload temporarily unavailable. Please try again in a few minutes.") # Dedup by username (stored in the 'label' column) if existing_dataset is not None: present = hf_username in existing_dataset["label"] if present and not overwrite: return True, f"a certificate for '{hf_username}' already exists in the gallery." if present and overwrite: existing_dataset = existing_dataset.filter( lambda ex: str(ex.get("label", "")).strip().lower() != hf_username.lower() ) with tempfile.TemporaryDirectory() as temp_dir: if isinstance(certificate_image, PILImage.Image): temp_image_path = os.path.join(temp_dir, f"certificate_{hf_username}_{int(time.time())}.png") certificate_image.save(temp_image_path, "PNG") elif isinstance(certificate_image, str) and os.path.exists(certificate_image): temp_image_path = certificate_image else: return False, "❌ Error: Invalid image format provided" new_dataset = Dataset.from_dict( {"image": [temp_image_path], "label": [hf_username]} ).cast_column("image", Image()) if existing_dataset is not None and not is_empty_dataset and len(existing_dataset) > 0: combined_dataset = concatenate_datasets([existing_dataset, new_dataset]) else: combined_dataset = new_dataset try: combined_dataset.push_to_hub(CERTIFICATE_DATASET_NAME, private=True, token=HF_TOKEN) logger.info(f"Saved certificate. Total now: {len(combined_dataset)}") return True, f"✅ saved to the gallery for {hf_username}." except Exception as upload_error: msg = str(upload_error).lower() if any(i in msg for i in ("rate limit", "429", "too many requests")): return False, "⏳ Upload busy due to high load — please try again in 10–15 minutes." logger.error(f"Upload failed: {upload_error}") return False, f"❌ Certificate upload failed: {str(upload_error)}" except Exception as e: logger.error(f"Unexpected error in certificate upload: {e}") return False, f"❌ Certificate upload failed: {str(e)}" def upload_user_certificate(certificate_image, hf_username, overwrite=False): """Public entry point — returns (success, message).""" if not certificate_image: return False, "❌ No certificate image provided" if not hf_username or not hf_username.strip(): return False, "❌ HF username is required" return safe_add_certificate_to_dataset(certificate_image, hf_username, overwrite=overwrite)