Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from datasets import Dataset, DatasetDict | |
| from huggingface_hub import notebook_login, HfApi | |
| def process_and_upload(json_file_path, hf_username, dataset_name): | |
| # Load your combined JSON file | |
| df = pd.read_json(json_file_path.name) | |
| print(f"DataFrame shape: {df.shape}") | |
| print(f"DataFrame columns: {df.columns.tolist()}") | |
| # Create HuggingFace Dataset | |
| dataset = Dataset.from_pandas(df) | |
| print("Dataset created successfully!") | |
| # Create a DatasetDict with a 'train' split | |
| dataset_dict = DatasetDict({"train": dataset}) | |
| # Log in to Hugging Face (or use existing token if available) | |
| try: | |
| notebook_login() | |
| except: | |
| # Assume user has already logged in or token is set | |
| pass | |
| # Push to the Hugging Face Hub | |
| api = HfApi() | |
| api.create_repo(repo_id=f"{hf_username}/{dataset_name}", repo_type="dataset", private=False, exist_ok=True) | |
| dataset_dict.push_to_hub(f"{hf_username}/{dataset_name}", private=False) | |
| return f"Dataset '{dataset_name}' uploaded to Hugging Face Hub under user '{hf_username}'" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Upload and Process JSON to Hugging Face Dataset") | |
| with gr.Row(): | |
| json_file_input = gr.File(label="Select JSON file") | |
| hf_username_input = gr.Textbox(label="Hugging Face Username", placeholder="Your HF username") | |
| dataset_name_input = gr.Textbox(label="Dataset Name", placeholder="Name for your dataset") | |
| submit_button = gr.Button("Upload to Hugging Face") | |
| output_label = gr.Label(label="Output") | |
| submit_button.click( | |
| process_and_upload, | |
| inputs=[json_file_input, hf_username_input, dataset_name_input], | |
| outputs=output_label | |
| ) | |
| demo.launch() |