Spaces:
Running
Running
File size: 3,970 Bytes
f66a6b2 dfdb553 f66a6b2 dfdb553 88ebfce f66a6b2 88ebfce f66a6b2 dfdb553 88ebfce dfdb553 88ebfce dfdb553 88ebfce dfdb553 88ebfce f66a6b2 88ebfce f66a6b2 88ebfce dfdb553 88ebfce f66a6b2 88ebfce f66a6b2 88ebfce f66a6b2 88ebfce dfdb553 f66a6b2 dfdb553 f66a6b2 dfdb553 f66a6b2 dfdb553 f66a6b2 dfdb553 88ebfce f66a6b2 88ebfce f66a6b2 88ebfce f66a6b2 dfdb553 f66a6b2 dfdb553 88ebfce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | from dash import Dash, html, dcc, dash_table
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
from datasets import load_dataset
# -----------------------------
# Carregar dataset do Hugging Face
# -----------------------------
ds = load_dataset("nvidia/Nemotron-Image-Training-v3", "aokvqa_1")
df = ds["train"].to_pandas()
# Limitar para evitar dashboard pesado
df_preview = df.head(20).copy()
# Criar dataframe de resumo
resumo_df = pd.DataFrame({
"Métrica": [
"Total de linhas",
"Total de colunas"
],
"Valor": [
len(df),
len(df.columns)
]
})
colunas_df = pd.DataFrame({
"Colunas": list(df.columns)
})
grafico_df = pd.DataFrame({
"Tipo": ["Registos"],
"Quantidade": [len(df)]
})
# -----------------------------
# Criar app
# -----------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
# -----------------------------
# Layout
# -----------------------------
app.layout = dbc.Container([
dbc.Row([
dbc.Col([
html.H1("Dashboard do Dataset Nemotron", className="text-center my-4"),
html.P(
"Dataset carregado com Hugging Face datasets: nvidia/Nemotron-Image-Training-v3 / aokvqa_1",
className="text-center text-muted"
)
])
]),
dbc.Row([
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Total de registos", className="card-title"),
html.H2(f"{len(df)}", className="text-primary")
])
),
md=4
),
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Total de colunas", className="card-title"),
html.H2(f"{len(df.columns)}", className="text-success")
])
),
md=4
),
dbc.Col(
dbc.Card(
dbc.CardBody([
html.H5("Subset", className="card-title"),
html.H2("aokvqa_1", className="text-danger")
])
),
md=4
),
], className="mb-4"),
dbc.Row([
dbc.Col([
dcc.Graph(
figure=px.bar(
grafico_df,
x="Tipo",
y="Quantidade",
title="Quantidade de registos no dataset"
)
)
], md=6),
dbc.Col([
html.H4("Colunas disponíveis"),
dash_table.DataTable(
columns=[{"name": i, "id": i} for i in colunas_df.columns],
data=colunas_df.to_dict("records"),
style_table={"overflowX": "auto", "height": "350px", "overflowY": "auto"},
style_cell={"textAlign": "left", "padding": "8px"},
style_header={"fontWeight": "bold", "backgroundColor": "#f8f9fa"},
page_size=10
)
], md=6)
], className="mb-4"),
dbc.Row([
dbc.Col([
html.H4("Primeiras 20 linhas"),
dash_table.DataTable(
columns=[{"name": i, "id": i} for i in df_preview.columns],
data=df_preview.astype(str).to_dict("records"),
style_table={"overflowX": "auto"},
style_cell={
"textAlign": "left",
"padding": "8px",
"maxWidth": "250px",
"whiteSpace": "normal"
},
style_header={
"fontWeight": "bold",
"backgroundColor": "#f8f9fa"
},
page_size=20
)
])
])
], fluid=True)
# -----------------------------
# Run app
# -----------------------------
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False) |