File size: 3,970 Bytes
f66a6b2
dfdb553
 
 
f66a6b2
dfdb553
88ebfce
f66a6b2
88ebfce
f66a6b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfdb553
 
88ebfce
 
 
dfdb553
88ebfce
dfdb553
88ebfce
 
 
dfdb553
88ebfce
 
f66a6b2
88ebfce
f66a6b2
88ebfce
 
 
 
dfdb553
 
88ebfce
 
 
f66a6b2
 
88ebfce
 
 
 
 
 
 
f66a6b2
 
88ebfce
 
 
 
 
 
 
f66a6b2
 
88ebfce
 
 
 
dfdb553
 
 
 
f66a6b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfdb553
f66a6b2
dfdb553
f66a6b2
dfdb553
f66a6b2
 
dfdb553
88ebfce
f66a6b2
 
 
 
88ebfce
 
f66a6b2
 
88ebfce
f66a6b2
dfdb553
f66a6b2
dfdb553
 
 
88ebfce
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from dash import Dash, html, dcc, dash_table
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
from datasets import load_dataset

# -----------------------------
# Carregar dataset do Hugging Face
# -----------------------------
ds = load_dataset("nvidia/Nemotron-Image-Training-v3", "aokvqa_1")
df = ds["train"].to_pandas()

# Limitar para evitar dashboard pesado
df_preview = df.head(20).copy()

# Criar dataframe de resumo
resumo_df = pd.DataFrame({
    "Métrica": [
        "Total de linhas",
        "Total de colunas"
    ],
    "Valor": [
        len(df),
        len(df.columns)
    ]
})

colunas_df = pd.DataFrame({
    "Colunas": list(df.columns)
})

grafico_df = pd.DataFrame({
    "Tipo": ["Registos"],
    "Quantidade": [len(df)]
})

# -----------------------------
# Criar app
# -----------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server

# -----------------------------
# Layout
# -----------------------------
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("Dashboard do Dataset Nemotron", className="text-center my-4"),
            html.P(
                "Dataset carregado com Hugging Face datasets: nvidia/Nemotron-Image-Training-v3 / aokvqa_1",
                className="text-center text-muted"
            )
        ])
    ]),

    dbc.Row([
        dbc.Col(
            dbc.Card(
                dbc.CardBody([
                    html.H5("Total de registos", className="card-title"),
                    html.H2(f"{len(df)}", className="text-primary")
                ])
            ),
            md=4
        ),
        dbc.Col(
            dbc.Card(
                dbc.CardBody([
                    html.H5("Total de colunas", className="card-title"),
                    html.H2(f"{len(df.columns)}", className="text-success")
                ])
            ),
            md=4
        ),
        dbc.Col(
            dbc.Card(
                dbc.CardBody([
                    html.H5("Subset", className="card-title"),
                    html.H2("aokvqa_1", className="text-danger")
                ])
            ),
            md=4
        ),
    ], className="mb-4"),

    dbc.Row([
        dbc.Col([
            dcc.Graph(
                figure=px.bar(
                    grafico_df,
                    x="Tipo",
                    y="Quantidade",
                    title="Quantidade de registos no dataset"
                )
            )
        ], md=6),

        dbc.Col([
            html.H4("Colunas disponíveis"),
            dash_table.DataTable(
                columns=[{"name": i, "id": i} for i in colunas_df.columns],
                data=colunas_df.to_dict("records"),
                style_table={"overflowX": "auto", "height": "350px", "overflowY": "auto"},
                style_cell={"textAlign": "left", "padding": "8px"},
                style_header={"fontWeight": "bold", "backgroundColor": "#f8f9fa"},
                page_size=10
            )
        ], md=6)
    ], className="mb-4"),

    dbc.Row([
        dbc.Col([
            html.H4("Primeiras 20 linhas"),
            dash_table.DataTable(
                columns=[{"name": i, "id": i} for i in df_preview.columns],
                data=df_preview.astype(str).to_dict("records"),
                style_table={"overflowX": "auto"},
                style_cell={
                    "textAlign": "left",
                    "padding": "8px",
                    "maxWidth": "250px",
                    "whiteSpace": "normal"
                },
                style_header={
                    "fontWeight": "bold",
                    "backgroundColor": "#f8f9fa"
                },
                page_size=20
            )
        ])
    ])
], fluid=True)

# -----------------------------
# Run app
# -----------------------------
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=False)