ChrisSacrumCor commited on
Commit
512be5b
·
verified ·
1 Parent(s): 9ef2ab8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from datasets import Dataset, DatasetDict
4
+ from huggingface_hub import notebook_login, HfApi
5
+
6
+ def process_and_upload(json_file_path, hf_username, dataset_name):
7
+ # Load your combined JSON file
8
+ df = pd.read_json(json_file_path.name)
9
+ print(f"DataFrame shape: {df.shape}")
10
+ print(f"DataFrame columns: {df.columns.tolist()}")
11
+
12
+ # Create HuggingFace Dataset
13
+ dataset = Dataset.from_pandas(df)
14
+ print("Dataset created successfully!")
15
+
16
+ # Create a DatasetDict with a 'train' split
17
+ dataset_dict = DatasetDict({"train": dataset})
18
+
19
+ # Log in to Hugging Face (or use existing token if available)
20
+ try:
21
+ notebook_login()
22
+ except:
23
+ # Assume user has already logged in or token is set
24
+ pass
25
+
26
+ # Push to the Hugging Face Hub
27
+ api = HfApi()
28
+ api.create_repo(repo_id=f"{hf_username}/{dataset_name}", repo_type="dataset", private=False, exist_ok=True)
29
+ dataset_dict.push_to_hub(f"{hf_username}/{dataset_name}", private=False)
30
+
31
+ return f"Dataset '{dataset_name}' uploaded to Hugging Face Hub under user '{hf_username}'"
32
+
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("## Upload and Process JSON to Hugging Face Dataset")
35
+ with gr.Row():
36
+ json_file_input = gr.File(label="Select JSON file")
37
+ hf_username_input = gr.Textbox(label="Hugging Face Username", placeholder="Your HF username")
38
+ dataset_name_input = gr.Textbox(label="Dataset Name", placeholder="Name for your dataset")
39
+ submit_button = gr.Button("Upload to Hugging Face")
40
+ output_label = gr.Label(label="Output")
41
+
42
+ submit_button.click(
43
+ process_and_upload,
44
+ inputs=[json_file_input, hf_username_input, dataset_name_input],
45
+ outputs=output_label
46
+ )
47
+
48
+ demo.launch()