from __future__ import annotations import gradio as gr from core.app_state import APP_STATE from core.events import Event, EventType from core.tab_feedback import emit_tab_error, status_ok from datasets.loader import ( dataset_statistics, preview_huggingface_dataset, preview_local_dataset, ) from ui.progress import CLICK_PROGRESS def build_dataset_tab() -> None: dataset_id = gr.Textbox( label="Dataset ID or local path", placeholder="Example: tatsu-lab/alpaca", ) split = gr.Textbox(label="Split", value="train") preview = gr.Dataframe(headers=["field", "value"], label="Preview") stats = gr.JSON(label="Dataset statistics") status = gr.Markdown(status_ok("Ready.")) load = gr.Button("Preview dataset", variant="primary") load_hf = gr.Button("Preview Hugging Face dataset") inspect = gr.Button("Calculate local stats") def preview_dataset(ds_id: str, split_name: str) -> tuple[list[list[str]], str]: if not ds_id: message = "Enter a local CSV or JSONL path." return [["status", message]], emit_tab_error("Dataset", message) try: result = preview_local_dataset(ds_id) APP_STATE.emit( Event( EventType.DATASET_LOADED, { "source": result.source, "rows": result.rows, "columns": result.columns, "split": split_name, }, ) ) return result.as_table(), status_ok("Local dataset preview loaded.") except (FileNotFoundError, ValueError, OSError) as exc: return [["error", str(exc)]], emit_tab_error( "Dataset", str(exc), {"source": ds_id, "split": split_name}, ) load.click( preview_dataset, [dataset_id, split], [preview, status], show_progress=CLICK_PROGRESS, ) def preview_hf_dataset(ds_id: str, split_name: str) -> tuple[list[list[str]], str]: if not ds_id: message = "Enter a Hugging Face dataset ID." return [["status", message]], emit_tab_error("Dataset", message) try: result = preview_huggingface_dataset(ds_id, split_name) except (ImportError, RuntimeError, ValueError, OSError) as exc: return [["error", str(exc)]], emit_tab_error( "Dataset", str(exc), {"source": ds_id, "split": split_name}, ) return result.as_table(), status_ok("Hugging Face dataset preview loaded.") def calculate_stats(ds_id: str) -> tuple[dict, str]: if not ds_id: message = "Enter a local CSV or JSONL path." return {"status": message}, emit_tab_error("Dataset", message) try: return dataset_statistics(ds_id).as_dict(), status_ok("Local dataset stats calculated.") except (FileNotFoundError, ValueError, OSError) as exc: return {"error": str(exc)}, emit_tab_error( "Dataset", str(exc), {"source": ds_id}, ) load_hf.click( preview_hf_dataset, [dataset_id, split], [preview, status], show_progress=CLICK_PROGRESS, ) inspect.click( calculate_stats, dataset_id, [stats, status], show_progress=CLICK_PROGRESS, )