import gradio as gr import pandas as pd from datasets import load_dataset import os # --- Backend Functions --- def preview_dataset(dataset_id, config_name, split_name, num_rows): """ Streams the first N rows of a dataset without downloading the whole thing. """ if not dataset_id: return pd.DataFrame(), "⚠️ Please enter a Dataset ID." try: # If config is empty, assume None or 'default' config_name = config_name if config_name.strip() else None # Stream the dataset ds_stream = load_dataset( dataset_id, name=config_name, split=split_name, streaming=True ) # Get first N rows data_head = list(ds_stream.take(int(num_rows))) df = pd.DataFrame(data_head) return df, f"✅ Successfully loaded first {num_rows} rows from {dataset_id}" except Exception as e: return pd.DataFrame(), f"❌ Error: {str(e)}" def upload_to_hub(files, repo_name, is_private): """ Placeholder function for development. The Token field was removed, so this functionality is currently disabled. """ if not files: return "⚠️ No files selected." # Just return a message for now since we removed the token logic return f"🛠️ Dev Mode: Upload to '{repo_name}' triggered (functionality disabled)." # --- Frontend Interface --- def render_explorer(): """ Returns the layout (Column) and the back button. """ with gr.Column(visible=False) as explorer_view: gr.Markdown("# 🤗 Hugging Face Dataset Manager") with gr.Tabs(): # TAB 1: EXPLORE with gr.TabItem("🔍 Explore Datasets"): gr.Markdown("Preview any dataset on the Hub without downloading it.") with gr.Row(): in_id = gr.Textbox(label="Dataset ID", placeholder="e.g. imdb, glue, common_voice", value="imdb") in_config = gr.Textbox(label="Config / Subset", placeholder="e.g. default", value="") in_split = gr.Textbox(label="Split", value="train") in_rows = gr.Slider(minimum=5, maximum=100, value=10, step=5, label="Rows to Preview") btn_preview = gr.Button("Load Preview", variant="primary") out_status = gr.Markdown() out_df = gr.Dataframe(label="Dataset Preview", wrap=True) btn_preview.click( fn=preview_dataset, inputs=[in_id, in_config, in_split, in_rows], outputs=[out_df, out_status] ) # TAB 2: UPLOADER with gr.TabItem("☁️ Upload New Dataset"): gr.Markdown("Upload CSV, JSON, or Parquet files to create a new dataset.") # --- TOKEN FIELD REMOVED --- with gr.Row(): in_repo = gr.Textbox(label="New Repo Name", placeholder="username/my-new-dataset") in_private = gr.Checkbox(label="Private Dataset", value=True) in_files = gr.File(label="Drag & Drop Files", file_count="multiple") btn_upload = gr.Button("Push to Hub", variant="primary") out_upload_status = gr.Markdown() # Updated inputs to exclude the token btn_upload.click( fn=upload_to_hub, inputs=[in_files, in_repo, in_private], outputs=[out_upload_status] ) gr.Markdown("---") btn_back = gr.Button("⬅️ Back to Home", variant="secondary") return explorer_view, btn_back if __name__ == "__main__": with gr.Blocks() as demo: view, _ = render_explorer() view.visible = True demo.launch()