Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| # ---------- CONFIGURATION ---------- # | |
| DATASET_NAME = "ag_news" | |
| LABEL_MAPPING = { | |
| 0: "World", | |
| 1: "Sports", | |
| 2: "Business", | |
| 3: "Science/Technology" | |
| } | |
| SPLIT = "train[:1000]" # For demo purposes, only use a subset | |
| # ---------- DATA LOADING ---------- # | |
| def load_data(): | |
| dataset = load_dataset(DATASET_NAME, split=SPLIT) | |
| df = pd.DataFrame(dataset) | |
| df["label_name"] = df["label"].map(LABEL_MAPPING) | |
| return df | |
| df = load_data() | |
| # ---------- APP FUNCTIONALITY ---------- # | |
| def get_data_preview(n_rows: int = 5) -> pd.DataFrame: | |
| """Return the top n rows of the dataset.""" | |
| return df.head(n_rows) | |
| def get_label_distribution_plot(): | |
| """Return a bar chart of label distribution.""" | |
| counts = df["label_name"].value_counts().sort_index() | |
| fig, ax = plt.subplots() | |
| counts.plot(kind="bar", ax=ax, color="#4C72B0") | |
| ax.set_title("Label Distribution") | |
| ax.set_ylabel("Number of Samples") | |
| ax.set_xlabel("Category") | |
| ax.grid(axis="y", linestyle="--", alpha=0.7) | |
| plt.tight_layout() | |
| return fig | |
| # ---------- UI LAYOUT ---------- # | |
| with gr.Blocks(title="AG News Dataset Explorer") as demo: | |
| gr.Markdown(""" | |
| # π§ AG News Dataset Explorer | |
| Explore a cleaned and labeled version of the AG News dataset using Hugging Face `datasets`. | |
| - View sample records | |
| - Understand label distribution | |
| - Great for data engineers & NLP practitioners | |
| """) | |
| with gr.Tab("π Data Preview"): | |
| with gr.Row(): | |
| n_slider = gr.Slider(1, 20, value=5, label="Number of rows to preview") | |
| preview_btn = gr.Button("Show Preview") | |
| preview_table = gr.Dataframe(label="Sample Rows") | |
| preview_btn.click(get_data_preview, inputs=n_slider, outputs=preview_table) | |
| with gr.Tab("π Label Distribution"): | |
| dist_btn = gr.Button("Generate Distribution Plot") | |
| dist_plot = gr.Plot(label="Label Distribution") | |
| dist_btn.click(get_label_distribution_plot, outputs=dist_plot) | |
| gr.Markdown(""" | |
| --- | |
| π **Dataset**: [AG News on Hugging Face](https://huggingface.co/datasets/ag_news) | |
| β¨ | |
| """) | |
| # ---------- LAUNCH ---------- # | |
| demo.launch() | |