Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| import os | |
| # --- Backend Functions --- | |
| def preview_dataset(dataset_id, config_name, split_name, num_rows): | |
| """ | |
| Streams the first N rows of a dataset without downloading the whole thing. | |
| """ | |
| if not dataset_id: | |
| return pd.DataFrame(), "โ ๏ธ Please enter a Dataset ID." | |
| try: | |
| # If config is empty, assume None or 'default' | |
| config_name = config_name if config_name.strip() else None | |
| # Stream the dataset | |
| ds_stream = load_dataset( | |
| dataset_id, | |
| name=config_name, | |
| split=split_name, | |
| streaming=True | |
| ) | |
| # Get first N rows | |
| data_head = list(ds_stream.take(int(num_rows))) | |
| df = pd.DataFrame(data_head) | |
| return df, f"โ Successfully loaded first {num_rows} rows from {dataset_id}" | |
| except Exception as e: | |
| return pd.DataFrame(), f"โ Error: {str(e)}" | |
| def upload_to_hub(files, repo_name, is_private): | |
| """ | |
| Placeholder function for development. | |
| The Token field was removed, so this functionality is currently disabled. | |
| """ | |
| if not files: | |
| return "โ ๏ธ No files selected." | |
| # Just return a message for now since we removed the token logic | |
| return f"๐ ๏ธ Dev Mode: Upload to '{repo_name}' triggered (functionality disabled)." | |
| # --- Frontend Interface --- | |
| def render_explorer(): | |
| """ | |
| Returns the layout (Column) and the back button. | |
| """ | |
| with gr.Column(visible=False) as explorer_view: | |
| gr.Markdown("# ๐ค Hugging Face Dataset Manager") | |
| with gr.Tabs(): | |
| # TAB 1: EXPLORE | |
| with gr.TabItem("๐ Explore Datasets"): | |
| gr.Markdown("Preview any dataset on the Hub without downloading it.") | |
| with gr.Row(): | |
| in_id = gr.Textbox(label="Dataset ID", placeholder="e.g. imdb, glue, common_voice", value="imdb") | |
| in_config = gr.Textbox(label="Config / Subset", placeholder="e.g. default", value="") | |
| in_split = gr.Textbox(label="Split", value="train") | |
| in_rows = gr.Slider(minimum=5, maximum=100, value=10, step=5, label="Rows to Preview") | |
| btn_preview = gr.Button("Load Preview", variant="primary") | |
| out_status = gr.Markdown() | |
| out_df = gr.Dataframe(label="Dataset Preview", wrap=True) | |
| btn_preview.click( | |
| fn=preview_dataset, | |
| inputs=[in_id, in_config, in_split, in_rows], | |
| outputs=[out_df, out_status] | |
| ) | |
| # TAB 2: UPLOADER | |
| with gr.TabItem("โ๏ธ Upload New Dataset"): | |
| gr.Markdown("Upload CSV, JSON, or Parquet files to create a new dataset.") | |
| # --- TOKEN FIELD REMOVED --- | |
| with gr.Row(): | |
| in_repo = gr.Textbox(label="New Repo Name", placeholder="username/my-new-dataset") | |
| in_private = gr.Checkbox(label="Private Dataset", value=True) | |
| in_files = gr.File(label="Drag & Drop Files", file_count="multiple") | |
| btn_upload = gr.Button("Push to Hub", variant="primary") | |
| out_upload_status = gr.Markdown() | |
| # Updated inputs to exclude the token | |
| btn_upload.click( | |
| fn=upload_to_hub, | |
| inputs=[in_files, in_repo, in_private], | |
| outputs=[out_upload_status] | |
| ) | |
| gr.Markdown("---") | |
| btn_back = gr.Button("โฌ ๏ธ Back to Home", variant="secondary") | |
| return explorer_view, btn_back | |
| if __name__ == "__main__": | |
| with gr.Blocks() as demo: | |
| view, _ = render_explorer() | |
| view.visible = True | |
| demo.launch() |