Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import re | |
| from datetime import datetime | |
| from pathlib import Path | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from huggingface_hub import CommitScheduler, HfApi | |
| from huggingface_hub.utils import HfHubHTTPError | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| JSON_DATASET_DIR = Path("dataset") | |
| JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
| JSON_DATASET_PATH = JSON_DATASET_DIR / "dataset.jsonl" | |
| scheduler = CommitScheduler( | |
| repo_id="librarian-bots/collection_cloner-usage-stats", | |
| repo_type="dataset", | |
| folder_path=JSON_DATASET_DIR, | |
| path_in_repo=str(JSON_DATASET_PATH), | |
| token=HF_TOKEN, | |
| ) | |
| def save_json(source_slug: str, destination_slug: str) -> None: | |
| with scheduler.lock: | |
| with JSON_DATASET_PATH.open("a") as f: | |
| if source_slug.startswith("hf_"): # catch people accidentally adding tokens | |
| return None | |
| if destination_slug.startswith("hf_"): | |
| return None | |
| json.dump( | |
| { | |
| "source_collection": source_slug, | |
| "destination_collection": destination_slug, | |
| "datetime": datetime.now().isoformat(), | |
| }, | |
| f, | |
| ) | |
| f.write("\n") | |
| def extract_slug(url): | |
| pattern = r"https://huggingface\.co/collections/(.*)" | |
| return match.group(1) if (match := re.search(pattern, url)) else None | |
| def clone_collection( | |
| source_slug, dest_title, token, dest_namespace=None, private=False, exist_ok=False | |
| ): | |
| api = HfApi(token=token) | |
| source_slug = source_slug.strip() | |
| # check if formatted as url | |
| if source_slug.startswith("https://huggingface.co/collections/"): | |
| source_slug = extract_slug(source_slug) | |
| collection = api.get_collection(source_slug) | |
| if not collection: | |
| raise gr.Error( | |
| f"Collection {source_slug} does not exist or you do not have access to it." | |
| ) | |
| description = f"Copied from {collection.title} using https://huggingface.co/spaces/librarian-bots/collection_cloner." | |
| if dest_namespace == "username": | |
| dest_namespace = None | |
| new_collection = api.create_collection( | |
| dest_title, | |
| namespace=dest_namespace, | |
| exists_ok=exist_ok, | |
| private=private, | |
| description=description, | |
| token=token, | |
| ) | |
| for item in collection.items: | |
| try: | |
| api.add_collection_item( | |
| new_collection.slug, item.item_id, item_type=item.item_type | |
| ) | |
| except HfHubHTTPError as e: | |
| gr.Info( | |
| f"Failed to add item {item.item_id} to collection {new_collection.slug} because it already exists in this collection." | |
| ) | |
| if not private: | |
| save_json(collection.slug, new_collection.slug) | |
| return f"[Collection]({collection.url}) has been cloned into [{new_collection.slug}]({new_collection.url})" | |
| title = ( | |
| """<h1 style='text-align: center;'> 🧬 Collection Cloner 🧬</h1>""" | |
| ) | |
| with gr.Blocks(css="style.css") as demo: | |
| gr.HTML(title) | |
| gr.HTML( | |
| """<p style='text-align: center;'> | |
| This space allows you to clone a <a href="https://huggingface.co/docs/hub/collections">Collection</a> from the Hugging Face Hub into your own namespace.<p> | |
| <p style='text-align: center;'> You can edit this cloned Collection to your liking!</p>""" | |
| ) | |
| gr.Markdown( | |
| """ | |
| **Note**: To track interest in this feature this Space keeps a record of clones which are cloned into public collection. Clones into private Collections are not tracked.""" | |
| ) | |
| gr.Markdown("## Authentication") | |
| gr.Markdown( | |
| "Token is required to create a new collection and clone private collections. You can get your token from your [profile page](https://huggingface.co/settings/token)." | |
| ) | |
| with gr.Row(): | |
| token = gr.Textbox( | |
| label="Token", | |
| type="password", | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("## Source Collection") | |
| source_slug = gr.Textbox( | |
| label="Source Collection slug or URL", | |
| placeholder="e.g. username/collection-slug", | |
| ) | |
| gr.Markdown("## Destination Collection info") | |
| dest_title = gr.Textbox( | |
| label="Destination Title", | |
| ) | |
| dest_namespace = gr.Textbox( | |
| value="username", | |
| label="Destination Namespace (optional - defaults to your username))", | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| private = gr.Checkbox( | |
| False, | |
| label="Make new collection private?", | |
| ) | |
| overwrite = gr.Checkbox( | |
| False, | |
| label="Overwrite any collection with same slug as the destination?", | |
| ) | |
| submit_btn = gr.Button("Clone Collection") | |
| response = gr.Markdown() | |
| submit_btn.click( | |
| clone_collection, | |
| [ | |
| source_slug, | |
| dest_title, | |
| token, | |
| dest_namespace, | |
| private, | |
| overwrite, | |
| ], | |
| response, | |
| ) | |
| demo.launch() | |