import gradio as gr import pandas as pd from datasets import Dataset, DatasetDict from huggingface_hub import notebook_login, HfApi def process_and_upload(json_file_path, hf_username, dataset_name): # Load your combined JSON file df = pd.read_json(json_file_path.name) print(f"DataFrame shape: {df.shape}") print(f"DataFrame columns: {df.columns.tolist()}") # Create HuggingFace Dataset dataset = Dataset.from_pandas(df) print("Dataset created successfully!") # Create a DatasetDict with a 'train' split dataset_dict = DatasetDict({"train": dataset}) # Log in to Hugging Face (or use existing token if available) try: notebook_login() except: # Assume user has already logged in or token is set pass # Push to the Hugging Face Hub api = HfApi() api.create_repo(repo_id=f"{hf_username}/{dataset_name}", repo_type="dataset", private=False, exist_ok=True) dataset_dict.push_to_hub(f"{hf_username}/{dataset_name}", private=False) return f"Dataset '{dataset_name}' uploaded to Hugging Face Hub under user '{hf_username}'" with gr.Blocks() as demo: gr.Markdown("## Upload and Process JSON to Hugging Face Dataset") with gr.Row(): json_file_input = gr.File(label="Select JSON file") hf_username_input = gr.Textbox(label="Hugging Face Username", placeholder="Your HF username") dataset_name_input = gr.Textbox(label="Dataset Name", placeholder="Name for your dataset") submit_button = gr.Button("Upload to Hugging Face") output_label = gr.Label(label="Output") submit_button.click( process_and_upload, inputs=[json_file_input, hf_username_input, dataset_name_input], outputs=output_label ) demo.launch()