SendtoHFdataset / app.py
ChrisSacrumCor's picture
Create app.py
512be5b verified
import gradio as gr
import pandas as pd
from datasets import Dataset, DatasetDict
from huggingface_hub import notebook_login, HfApi
def process_and_upload(json_file_path, hf_username, dataset_name):
# Load your combined JSON file
df = pd.read_json(json_file_path.name)
print(f"DataFrame shape: {df.shape}")
print(f"DataFrame columns: {df.columns.tolist()}")
# Create HuggingFace Dataset
dataset = Dataset.from_pandas(df)
print("Dataset created successfully!")
# Create a DatasetDict with a 'train' split
dataset_dict = DatasetDict({"train": dataset})
# Log in to Hugging Face (or use existing token if available)
try:
notebook_login()
except:
# Assume user has already logged in or token is set
pass
# Push to the Hugging Face Hub
api = HfApi()
api.create_repo(repo_id=f"{hf_username}/{dataset_name}", repo_type="dataset", private=False, exist_ok=True)
dataset_dict.push_to_hub(f"{hf_username}/{dataset_name}", private=False)
return f"Dataset '{dataset_name}' uploaded to Hugging Face Hub under user '{hf_username}'"
with gr.Blocks() as demo:
gr.Markdown("## Upload and Process JSON to Hugging Face Dataset")
with gr.Row():
json_file_input = gr.File(label="Select JSON file")
hf_username_input = gr.Textbox(label="Hugging Face Username", placeholder="Your HF username")
dataset_name_input = gr.Textbox(label="Dataset Name", placeholder="Name for your dataset")
submit_button = gr.Button("Upload to Hugging Face")
output_label = gr.Label(label="Output")
submit_button.click(
process_and_upload,
inputs=[json_file_input, hf_username_input, dataset_name_input],
outputs=output_label
)
demo.launch()