from dash import Dash, html, dcc, dash_table
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
from datasets import load_dataset
# -----------------------------
# Carregar dataset do Hugging Face
# -----------------------------
ds = load_dataset("nvidia/Nemotron-Image-Training-v3", "aokvqa_1")
df = ds["train"].to_pandas()
# Limitar para evitar dashboard pesado
df_preview = df.head(20).copy()
# Criar dataframe de resumo
resumo_df = pd.DataFrame({
"Métrica": [
"Total de linhas",
"Total de colunas"
],
"Valor": [
len(df),
len(df.columns)
]
})
colunas_df = pd.DataFrame({
"Colunas": list(df.columns)
})
grafico_df = pd.DataFrame({
"Tipo": ["Registos"],
"Quantidade": [len(df)]
})
# -----------------------------
# Criar app
# -----------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
# -----------------------------
# Layout
# -----------------------------
app.layout = dbc.Container([
dbc.Row([
dbc.Col([
html.H1("Dashboard do Dataset Nemotron", className="text-center my-4"),
html.P(
"Dataset carregado com Hugging Face datasets: nvidia/Nemotron-Image-Training-v3 / aokvqa_1",
className="text-center text-muted"
)
])
]),
dbc.Row([
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Total de registos", className="card-title"),
html.H2(f"{len(df)}", className="text-primary")
])
),
md=4
),
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Total de colunas", className="card-title"),
html.H2(f"{len(df.columns)}", className="text-success")
])
),
md=4
),
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Subset", className="card-title"),
html.H2("aokvqa_1", className="text-danger")
])
),
md=4
),
], className="mb-4"),
dbc.Row([
dbc.Col([
dcc.Graph(
figure=px.bar(
grafico_df,
x="Tipo",
y="Quantidade",
title="Quantidade de registos no dataset"
)
)
], md=6),
dbc.Col([
html.H4("Colunas disponÃveis"),
dash_table.DataTable(
columns=[{"name": i, "id": i} for i in colunas_df.columns],
data=colunas_df.to_dict("records"),
style_table={"overflowX": "auto", "height": "350px", "overflowY": "auto"},
style_cell={"textAlign": "left", "padding": "8px"},
style_header={"fontWeight": "bold", "backgroundColor": "#f8f9fa"},
page_size=10
)
], md=6)
], className="mb-4"),
dbc.Row([
dbc.Col([
html.H4("Primeiras 20 linhas"),
dash_table.DataTable(
columns=[{"name": i, "id": i} for i in df_preview.columns],
data=df_preview.astype(str).to_dict("records"),
style_table={"overflowX": "auto"},
style_cell={
"textAlign": "left",
"padding": "8px",
"maxWidth": "250px",
"whiteSpace": "normal"
},
style_header={
"fontWeight": "bold",
"backgroundColor": "#f8f9fa"
},
page_size=20
)
])
])
], fluid=True)
# -----------------------------
# Run app
# -----------------------------
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False)