BYO-community-v2 / data_explorer.py
sefif's picture
Update data_explorer.py
aba547a verified
import gradio as gr
import pandas as pd
from datasets import load_dataset
import os
# --- Backend Functions ---
def preview_dataset(dataset_id, config_name, split_name, num_rows):
"""
Streams the first N rows of a dataset without downloading the whole thing.
"""
if not dataset_id:
return pd.DataFrame(), "โš ๏ธ Please enter a Dataset ID."
try:
# If config is empty, assume None or 'default'
config_name = config_name if config_name.strip() else None
# Stream the dataset
ds_stream = load_dataset(
dataset_id,
name=config_name,
split=split_name,
streaming=True
)
# Get first N rows
data_head = list(ds_stream.take(int(num_rows)))
df = pd.DataFrame(data_head)
return df, f"โœ… Successfully loaded first {num_rows} rows from {dataset_id}"
except Exception as e:
return pd.DataFrame(), f"โŒ Error: {str(e)}"
def upload_to_hub(files, repo_name, is_private):
"""
Placeholder function for development.
The Token field was removed, so this functionality is currently disabled.
"""
if not files:
return "โš ๏ธ No files selected."
# Just return a message for now since we removed the token logic
return f"๐Ÿ› ๏ธ Dev Mode: Upload to '{repo_name}' triggered (functionality disabled)."
# --- Frontend Interface ---
def render_explorer():
"""
Returns the layout (Column) and the back button.
"""
with gr.Column(visible=False) as explorer_view:
gr.Markdown("# ๐Ÿค— Hugging Face Dataset Manager")
with gr.Tabs():
# TAB 1: EXPLORE
with gr.TabItem("๐Ÿ” Explore Datasets"):
gr.Markdown("Preview any dataset on the Hub without downloading it.")
with gr.Row():
in_id = gr.Textbox(label="Dataset ID", placeholder="e.g. imdb, glue, common_voice", value="imdb")
in_config = gr.Textbox(label="Config / Subset", placeholder="e.g. default", value="")
in_split = gr.Textbox(label="Split", value="train")
in_rows = gr.Slider(minimum=5, maximum=100, value=10, step=5, label="Rows to Preview")
btn_preview = gr.Button("Load Preview", variant="primary")
out_status = gr.Markdown()
out_df = gr.Dataframe(label="Dataset Preview", wrap=True)
btn_preview.click(
fn=preview_dataset,
inputs=[in_id, in_config, in_split, in_rows],
outputs=[out_df, out_status]
)
# TAB 2: UPLOADER
with gr.TabItem("โ˜๏ธ Upload New Dataset"):
gr.Markdown("Upload CSV, JSON, or Parquet files to create a new dataset.")
# --- TOKEN FIELD REMOVED ---
with gr.Row():
in_repo = gr.Textbox(label="New Repo Name", placeholder="username/my-new-dataset")
in_private = gr.Checkbox(label="Private Dataset", value=True)
in_files = gr.File(label="Drag & Drop Files", file_count="multiple")
btn_upload = gr.Button("Push to Hub", variant="primary")
out_upload_status = gr.Markdown()
# Updated inputs to exclude the token
btn_upload.click(
fn=upload_to_hub,
inputs=[in_files, in_repo, in_private],
outputs=[out_upload_status]
)
gr.Markdown("---")
btn_back = gr.Button("โฌ…๏ธ Back to Home", variant="secondary")
return explorer_view, btn_back
if __name__ == "__main__":
with gr.Blocks() as demo:
view, _ = render_explorer()
view.visible = True
demo.launch()