from dash import Dash, html, dcc, dash_table import dash_bootstrap_components as dbc import pandas as pd import plotly.express as px from datasets import load_dataset # ----------------------------- # Carregar dataset do Hugging Face # ----------------------------- ds = load_dataset("nvidia/Nemotron-Image-Training-v3", "aokvqa_1") df = ds["train"].to_pandas() # Limitar para evitar dashboard pesado df_preview = df.head(20).copy() # Criar dataframe de resumo resumo_df = pd.DataFrame({ "Métrica": [ "Total de linhas", "Total de colunas" ], "Valor": [ len(df), len(df.columns) ] }) colunas_df = pd.DataFrame({ "Colunas": list(df.columns) }) grafico_df = pd.DataFrame({ "Tipo": ["Registos"], "Quantidade": [len(df)] }) # ----------------------------- # Criar app # ----------------------------- app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) server = app.server # ----------------------------- # Layout # ----------------------------- app.layout = dbc.Container([ dbc.Row([ dbc.Col([ html.H1("Dashboard do Dataset Nemotron", className="text-center my-4"), html.P( "Dataset carregado com Hugging Face datasets: nvidia/Nemotron-Image-Training-v3 / aokvqa_1", className="text-center text-muted" ) ]) ]), dbc.Row([ dbc.Col( dbc.Card( dbc.CardBody([ html.H5("Total de registos", className="card-title"), html.H2(f"{len(df)}", className="text-primary") ]) ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody([ html.H5("Total de colunas", className="card-title"), html.H2(f"{len(df.columns)}", className="text-success") ]) ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody([ html.H5("Subset", className="card-title"), html.H2("aokvqa_1", className="text-danger") ]) ), md=4 ), ], className="mb-4"), dbc.Row([ dbc.Col([ dcc.Graph( figure=px.bar( grafico_df, x="Tipo", y="Quantidade", title="Quantidade de registos no dataset" ) ) ], md=6), dbc.Col([ html.H4("Colunas disponíveis"), dash_table.DataTable( columns=[{"name": i, "id": i} for i in colunas_df.columns], data=colunas_df.to_dict("records"), style_table={"overflowX": "auto", "height": "350px", "overflowY": "auto"}, style_cell={"textAlign": "left", "padding": "8px"}, style_header={"fontWeight": "bold", "backgroundColor": "#f8f9fa"}, page_size=10 ) ], md=6) ], className="mb-4"), dbc.Row([ dbc.Col([ html.H4("Primeiras 20 linhas"), dash_table.DataTable( columns=[{"name": i, "id": i} for i in df_preview.columns], data=df_preview.astype(str).to_dict("records"), style_table={"overflowX": "auto"}, style_cell={ "textAlign": "left", "padding": "8px", "maxWidth": "250px", "whiteSpace": "normal" }, style_header={ "fontWeight": "bold", "backgroundColor": "#f8f9fa" }, page_size=20 ) ]) ]) ], fluid=True) # ----------------------------- # Run app # ----------------------------- if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=False)