Spaces:
Running
Running
| from dash import Dash, html, dcc, dash_table | |
| import dash_bootstrap_components as dbc | |
| import pandas as pd | |
| import plotly.express as px | |
| from datasets import load_dataset | |
| # ----------------------------- | |
| # Carregar dataset do Hugging Face | |
| # ----------------------------- | |
| ds = load_dataset("nvidia/Nemotron-Image-Training-v3", "aokvqa_1") | |
| df = ds["train"].to_pandas() | |
| # Limitar para evitar dashboard pesado | |
| df_preview = df.head(20).copy() | |
| # Criar dataframe de resumo | |
| resumo_df = pd.DataFrame({ | |
| "Métrica": [ | |
| "Total de linhas", | |
| "Total de colunas" | |
| ], | |
| "Valor": [ | |
| len(df), | |
| len(df.columns) | |
| ] | |
| }) | |
| colunas_df = pd.DataFrame({ | |
| "Colunas": list(df.columns) | |
| }) | |
| grafico_df = pd.DataFrame({ | |
| "Tipo": ["Registos"], | |
| "Quantidade": [len(df)] | |
| }) | |
| # ----------------------------- | |
| # Criar app | |
| # ----------------------------- | |
| app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
| server = app.server | |
| # ----------------------------- | |
| # Layout | |
| # ----------------------------- | |
| app.layout = dbc.Container([ | |
| dbc.Row([ | |
| dbc.Col([ | |
| html.H1("Dashboard do Dataset Nemotron", className="text-center my-4"), | |
| html.P( | |
| "Dataset carregado com Hugging Face datasets: nvidia/Nemotron-Image-Training-v3 / aokvqa_1", | |
| className="text-center text-muted" | |
| ) | |
| ]) | |
| ]), | |
| dbc.Row([ | |
| dbc.Col( | |
| dbc.Card( | |
| dbc.CardBody([ | |
| html.H5("Total de registos", className="card-title"), | |
| html.H2(f"{len(df)}", className="text-primary") | |
| ]) | |
| ), | |
| md=4 | |
| ), | |
| dbc.Col( | |
| dbc.Card( | |
| dbc.CardBody([ | |
| html.H5("Total de colunas", className="card-title"), | |
| html.H2(f"{len(df.columns)}", className="text-success") | |
| ]) | |
| ), | |
| md=4 | |
| ), | |
| dbc.Col( | |
| dbc.Card( | |
| dbc.CardBody([ | |
| html.H5("Subset", className="card-title"), | |
| html.H2("aokvqa_1", className="text-danger") | |
| ]) | |
| ), | |
| md=4 | |
| ), | |
| ], className="mb-4"), | |
| dbc.Row([ | |
| dbc.Col([ | |
| dcc.Graph( | |
| figure=px.bar( | |
| grafico_df, | |
| x="Tipo", | |
| y="Quantidade", | |
| title="Quantidade de registos no dataset" | |
| ) | |
| ) | |
| ], md=6), | |
| dbc.Col([ | |
| html.H4("Colunas disponíveis"), | |
| dash_table.DataTable( | |
| columns=[{"name": i, "id": i} for i in colunas_df.columns], | |
| data=colunas_df.to_dict("records"), | |
| style_table={"overflowX": "auto", "height": "350px", "overflowY": "auto"}, | |
| style_cell={"textAlign": "left", "padding": "8px"}, | |
| style_header={"fontWeight": "bold", "backgroundColor": "#f8f9fa"}, | |
| page_size=10 | |
| ) | |
| ], md=6) | |
| ], className="mb-4"), | |
| dbc.Row([ | |
| dbc.Col([ | |
| html.H4("Primeiras 20 linhas"), | |
| dash_table.DataTable( | |
| columns=[{"name": i, "id": i} for i in df_preview.columns], | |
| data=df_preview.astype(str).to_dict("records"), | |
| style_table={"overflowX": "auto"}, | |
| style_cell={ | |
| "textAlign": "left", | |
| "padding": "8px", | |
| "maxWidth": "250px", | |
| "whiteSpace": "normal" | |
| }, | |
| style_header={ | |
| "fontWeight": "bold", | |
| "backgroundColor": "#f8f9fa" | |
| }, | |
| page_size=20 | |
| ) | |
| ]) | |
| ]) | |
| ], fluid=True) | |
| # ----------------------------- | |
| # Run app | |
| # ----------------------------- | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860, debug=False) |