Spaces:
Paused
Paused
declutter layout
Browse files- app.py +8 -9
- opendashboards/assets/metric.py +12 -13
app.py
CHANGED
|
@@ -21,7 +21,7 @@ from opendashboards.assets import io, inspect, metric, plot
|
|
| 21 |
|
| 22 |
WANDB_PROJECT = "opentensor-dev/alpha-validators"
|
| 23 |
PROJECT_URL = f'https://wandb.ai/{WANDB_PROJECT}/table?workspace=default'
|
| 24 |
-
MAX_RECENT_RUNS =
|
| 25 |
DEFAULT_FILTERS = {}#{"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
| 26 |
DEFAULT_SELECTED_HOTKEYS = None
|
| 27 |
DEFAULT_TASK = 'qa'
|
|
@@ -56,13 +56,6 @@ metric.wandb(df_runs)
|
|
| 56 |
|
| 57 |
# add vertical space
|
| 58 |
st.markdown('#')
|
| 59 |
-
|
| 60 |
-
runid_c1, runid_c2 = st.columns([3, 1])
|
| 61 |
-
# make multiselect for run_ids with label on same line
|
| 62 |
-
run_ids = runid_c1.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).')
|
| 63 |
-
n_runs = len(run_ids)
|
| 64 |
-
df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)]
|
| 65 |
-
|
| 66 |
st.markdown('#')
|
| 67 |
|
| 68 |
tab1, tab2, tab3, tab4 = st.tabs(["Run Data", "UID Health", "Completions", "Prompt-based scoring"])
|
|
@@ -72,7 +65,13 @@ with tab1:
|
|
| 72 |
|
| 73 |
st.markdown('#')
|
| 74 |
st.subheader(":violet[Run] Data")
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
edited_df = st.data_editor(
|
| 78 |
df_runs.assign(Select=False).set_index('Select'),
|
|
|
|
| 21 |
|
| 22 |
WANDB_PROJECT = "opentensor-dev/alpha-validators"
|
| 23 |
PROJECT_URL = f'https://wandb.ai/{WANDB_PROJECT}/table?workspace=default'
|
| 24 |
+
MAX_RECENT_RUNS = 300
|
| 25 |
DEFAULT_FILTERS = {}#{"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
| 26 |
DEFAULT_SELECTED_HOTKEYS = None
|
| 27 |
DEFAULT_TASK = 'qa'
|
|
|
|
| 56 |
|
| 57 |
# add vertical space
|
| 58 |
st.markdown('#')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
st.markdown('#')
|
| 60 |
|
| 61 |
tab1, tab2, tab3, tab4 = st.tabs(["Run Data", "UID Health", "Completions", "Prompt-based scoring"])
|
|
|
|
| 65 |
|
| 66 |
st.markdown('#')
|
| 67 |
st.subheader(":violet[Run] Data")
|
| 68 |
+
|
| 69 |
+
# make multiselect for run_ids with label on same line
|
| 70 |
+
run_ids = st.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).')
|
| 71 |
+
n_runs = len(run_ids)
|
| 72 |
+
df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)]
|
| 73 |
+
|
| 74 |
+
with st.expander(f'Select from :violet[all] wandb runs'):
|
| 75 |
|
| 76 |
edited_df = st.data_editor(
|
| 77 |
df_runs.assign(Select=False).set_index('Select'),
|
opendashboards/assets/metric.py
CHANGED
|
@@ -28,29 +28,28 @@ def wandb(df_runs):
|
|
| 28 |
col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
|
| 29 |
col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
|
| 30 |
col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
|
| 31 |
-
|
| 32 |
st.markdown('----')
|
| 33 |
|
| 34 |
|
| 35 |
@st.cache_data
|
| 36 |
-
def runs(df_long):
|
| 37 |
-
|
| 38 |
col1, col2, col3, col4 = st.columns(4)
|
| 39 |
-
print(df_long.columns)
|
| 40 |
|
| 41 |
# Convert to appropriate units e.g. 1.2k instead of 1200.c
|
| 42 |
col1.metric('Runs', fmt(df_long.run_id.nunique()))
|
| 43 |
col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
|
| 44 |
col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
|
| 45 |
col4.metric('Completions', fmt(df_long.shape[0]))
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
st.markdown('----')
|
| 56 |
|
|
@@ -76,7 +75,7 @@ def uids(df_long, src, uids=None):
|
|
| 76 |
help='Number of unique completions divided by total number of events'
|
| 77 |
)
|
| 78 |
# uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
|
| 79 |
-
# uniqueness is the shared completions between selected uids
|
| 80 |
|
| 81 |
col3.metric(
|
| 82 |
label="Uniqueness %",
|
|
|
|
| 28 |
col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
|
| 29 |
col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
|
| 30 |
col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
|
| 31 |
+
|
| 32 |
st.markdown('----')
|
| 33 |
|
| 34 |
|
| 35 |
@st.cache_data
|
| 36 |
+
def runs(df_long, full=False):
|
| 37 |
+
|
| 38 |
col1, col2, col3, col4 = st.columns(4)
|
|
|
|
| 39 |
|
| 40 |
# Convert to appropriate units e.g. 1.2k instead of 1200.c
|
| 41 |
col1.metric('Runs', fmt(df_long.run_id.nunique()))
|
| 42 |
col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
|
| 43 |
col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
|
| 44 |
col4.metric('Completions', fmt(df_long.shape[0]))
|
| 45 |
+
|
| 46 |
+
if full:
|
| 47 |
+
aggs = df_long.groupby('task').agg({'uids': 'nunique', 'completions': 'nunique'})
|
| 48 |
+
for i,c in enumerate(st.columns(len(aggs))):
|
| 49 |
+
name = aggs.index[i].title()
|
| 50 |
+
uid_unique, comp_unique = aggs.iloc[i]
|
| 51 |
+
c.metric(label=f'{name} UIDs', value=uid_unique)
|
| 52 |
+
c.metric(label=f'{name} Completions', value=comp_unique)
|
| 53 |
|
| 54 |
st.markdown('----')
|
| 55 |
|
|
|
|
| 75 |
help='Number of unique completions divided by total number of events'
|
| 76 |
)
|
| 77 |
# uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
|
| 78 |
+
# uniqueness is the shared completions between selected uids
|
| 79 |
|
| 80 |
col3.metric(
|
| 81 |
label="Uniqueness %",
|