Files changed (6) hide show
  1. api.py +85 -51
  2. app.py +58 -77
  3. classes.py +0 -37
  4. ecosystem.config.js +0 -14
  5. requirements.txt +1 -3
  6. utils.py +55 -208
api.py CHANGED
@@ -2,37 +2,26 @@
2
  import atexit
3
  import datetime
4
 
5
- import pandas as pd
6
- import uvicorn
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
- from fastapi import FastAPI
9
 
10
  import utils
11
- from classes import Metagraph, Productivity, Throughput
 
 
12
 
13
  # Global variables (saves time on loading data)
14
  state_vars = None
15
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
 
17
- data_all = None
18
- data_30d = None
19
- data_24h = None
20
-
21
- app = FastAPI()
22
 
23
  def load_data():
24
  """
25
  Reload the state variables
26
  """
27
- global data_all, data_30d ,data_24h, reload_timestamp
28
-
29
- utils.fetch_new_runs()
30
-
31
- data_all = utils.preload_data()
32
-
33
- data_30d = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('30 days'))]
34
-
35
- data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
36
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
37
 
38
  print(f'Reloaded data at {reload_timestamp}')
@@ -47,65 +36,110 @@ def start_scheduler():
47
  atexit.register(lambda: scheduler.shutdown())
48
 
49
 
50
- @app.get("/")
51
  def home():
52
  return "Welcome to the Bittensor Protein Folding Leaderboard API!"
53
 
54
 
55
- @app.get("/updated")
56
  def updated():
57
  return reload_timestamp
58
 
59
 
60
- @app.get("/productivity", response_model=Productivity)
61
- def productivity_metrics():
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  """
63
  Get the productivity metrics
64
  """
65
 
66
- result = utils.get_productivity(df_all=data_all, df_24h=data_24h, df_30d=data_30d)
 
 
 
 
67
 
68
 
69
- return result
70
- @app.get("/metagraph", response_model=Metagraph)
71
- def get_metagraph():
72
  """
73
- Get the metagraph
74
  """
75
-
76
- df_m = utils.get_metagraph()
77
- df_miners = df_m.sort_values('I', ascending=False).reset_index()
78
- incentives = df_miners['I'].astype(float).values
79
- emissions = df_miners['E'].astype(float).values
80
- identities = df_miners['identity']
81
- hotkeys = df_miners['hotkey']
82
- coldkeys = df_miners['coldkey']
83
- trusts = df_miners['trust'].astype(float).values
84
- results = {'incentives': incentives,
85
- 'emissions': emissions,
86
- 'identities': identities,
87
- 'hotkeys': hotkeys,
88
- 'coldkeys': coldkeys,
89
- 'trusts': trusts}
90
- return results
91
-
92
- @app.get("/throughput", response_model=Throughput)
93
- def throughput_metrics():
94
  """
95
- Get the throughput metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  """
97
-
98
- return utils.get_data_transferred(data_all, data_24h)
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  if __name__ == '__main__':
103
 
104
  load_data()
105
-
106
  start_scheduler()
107
 
108
- uvicorn.run(app, host='0.0.0.0', port=5001)
109
 
110
 
111
  # to test locally
 
2
  import atexit
3
  import datetime
4
 
5
+ from flask import Flask, request, jsonify
 
6
  from apscheduler.schedulers.background import BackgroundScheduler
 
7
 
8
  import utils
9
+
10
+ app = Flask(__name__)
11
+
12
 
13
  # Global variables (saves time on loading data)
14
  state_vars = None
15
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
 
 
 
 
 
 
17
 
18
  def load_data():
19
  """
20
  Reload the state variables
21
  """
22
+ global state_vars, reload_timestamp
23
+ state_vars = utils.load_state_vars()
24
+
 
 
 
 
 
 
25
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
26
 
27
  print(f'Reloaded data at {reload_timestamp}')
 
36
  atexit.register(lambda: scheduler.shutdown())
37
 
38
 
39
+ @app.route('/', methods=['GET'])
40
  def home():
41
  return "Welcome to the Bittensor Protein Folding Leaderboard API!"
42
 
43
 
44
+ @app.route('/updated', methods=['GET'])
45
  def updated():
46
  return reload_timestamp
47
 
48
 
49
+ @app.route('/data', methods=['GET'])
50
+ @app.route('/data/<period>', methods=['GET'])
51
+ def data(period=None):
52
+ """
53
+ Get the productivity metrics
54
+ """
55
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
56
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
57
+ return jsonify(
58
+ df.astype(str).to_dict(orient='records')
59
+ )
60
+
61
+ @app.route('/productivity', methods=['GET'])
62
+ @app.route('/productivity/<period>', methods=['GET'])
63
+ def productivity_metrics(period=None):
64
  """
65
  Get the productivity metrics
66
  """
67
 
68
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
69
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
70
+ return jsonify(
71
+ utils.get_productivity(df)
72
+ )
73
 
74
 
75
+ @app.route('/throughput', methods=['GET'])
76
+ @app.route('/throughput/<period>', methods=['GET'])
77
+ def throughput_metrics(period=None):
78
  """
79
+ Get the throughput metrics
80
  """
81
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
82
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
83
+ return jsonify(utils.get_data_transferred(df))
84
+
85
+
86
+ @app.route('/metagraph', methods=['GET'])
87
+ def metagraph():
 
 
 
 
 
 
 
 
 
 
 
 
88
  """
89
+ Get the metagraph data
90
+ Returns:
91
+ - metagraph_data: List of dicts (from pandas DataFrame)
92
+ """
93
+
94
+ df_m = state_vars["metagraph"]
95
+
96
+ return jsonify(
97
+ df_m.to_dict(orient='records')
98
+ )
99
+
100
+ @app.route('/leaderboard', methods=['GET'])
101
+ @app.route('/leaderboard/<entity>', methods=['GET'])
102
+ @app.route('/leaderboard/<entity>/<ntop>', methods=['GET'])
103
+ def leaderboard(entity='identity',ntop=10):
104
+ """
105
+ Get the leaderboard data
106
+ Returns:
107
+ - leaderboard_data: List of dicts (from pandas DataFrame)
108
  """
 
 
109
 
110
+ assert entity in utils.ENTITY_CHOICES, f"Invalid entity choice: {entity}"
111
+
112
+ df_miners = utils.get_leaderboard(
113
+ state_vars["metagraph"],
114
+ ntop=int(ntop),
115
+ entity_choice=entity
116
+ )
117
+
118
+ return jsonify(
119
+ df_miners.to_dict(orient='records')
120
+ )
121
+
122
+ @app.route('/validator', methods=['GET'])
123
+ def validator():
124
+ """
125
+ Get the validator data
126
+ Returns:
127
+ - validator_data: List of dicts (from pandas DataFrame)
128
+ """
129
+ df_m = state_vars["metagraph"]
130
+ df_validators = df_m.loc[df_m.validator_trust > 0]
131
+
132
+ return jsonify(
133
+ df_validators.to_dict(orient='records')
134
+ )
135
 
136
 
137
  if __name__ == '__main__':
138
 
139
  load_data()
 
140
  start_scheduler()
141
 
142
+ app.run(host='0.0.0.0', port=5001, debug=True)
143
 
144
 
145
  # to test locally
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import time
2
-
3
  import pandas as pd
4
- import plotly.express as px
5
- import requests
6
  import streamlit as st
 
7
 
8
  import utils
9
 
@@ -16,26 +14,22 @@ Simulation duration distribution
16
  """
17
 
18
  UPDATE_INTERVAL = 3600
19
- BASE_URL = 'http://143.198.21.86:5001/'
20
 
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
24
- @st.cache_data(ttl=UPDATE_INTERVAL)
25
- def fetch_productivity_data():
26
- return requests.get(f'{BASE_URL}/productivity').json()
27
-
28
- @st.cache_data(ttl=UPDATE_INTERVAL)
29
- def fetch_throughput_data():
30
- return requests.get(f'{BASE_URL}/throughput').json()
31
 
32
- @st.cache_data(ttl=UPDATE_INTERVAL)
33
- def fetch_metagraph_data():
34
- return utils.get_metagraph()
 
 
 
35
 
36
- @st.cache_data(ttl=UPDATE_INTERVAL)
37
- def fetch_leaderboard_data(df_m, ntop, entity_choice):
38
- return utils.get_leaderboard(df_m, entity_choice=entity_choice)
39
 
40
  #### ------ PRODUCTIVITY ------
41
 
@@ -43,84 +37,68 @@ def fetch_leaderboard_data(df_m, ntop, entity_choice):
43
  st.subheader('Productivity overview')
44
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
45
 
46
- productivity_all = fetch_productivity_data()
47
- completed_jobs = productivity_all['all_time']['total_completed_jobs_data']
48
-
49
- productivity_24h = productivity_all['last_24h']
50
- completed_jobs = pd.DataFrame(completed_jobs)
51
 
52
- unique_folded = pd.DataFrame(productivity_all['all_time']['unique_folded_data'])
53
- # unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
54
 
55
  m1, m2, m3 = st.columns(3)
56
- m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
57
- m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
58
- m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
 
59
  st.markdown('<br>', unsafe_allow_html=True)
60
 
 
 
61
  PROD_CHOICES = {
62
- 'Total jobs completed': 'total_pdbs',
63
  'Unique proteins folded': 'unique_pdbs',
 
 
64
  }
65
-
66
  prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
67
  prod_choice = PROD_CHOICES[prod_choice_label]
68
-
69
- PROD_DATA = {
70
- 'unique_pdbs': unique_folded,
71
- 'total_pdbs': completed_jobs,
72
- }
73
- df = PROD_DATA[prod_choice]
74
-
75
- df = df.sort_values(by='last_event_at').reset_index()
76
-
77
- # Create a cumulative count column
78
- df['cumulative_jobs'] = df.index + 1
79
-
80
- # Plot the cumulative jobs over time
81
  st.plotly_chart(
82
- px.line(df, x='last_event_at', y='cumulative_jobs',
83
- labels={'last_event_at': 'Time', 'cumulative_jobs': prod_choice_label}).update_traces(fill='tozeroy'),
 
 
84
  use_container_width=True,
85
  )
86
 
87
  st.markdown('<br>', unsafe_allow_html=True)
88
 
 
89
  #### ------ THROUGHPUT ------
90
  st.subheader('Throughput overview')
91
 
92
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
93
 
94
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
95
- throughput = fetch_throughput_data()
96
 
97
- data_transferred = throughput['all_time']
98
- data_transferred_24h = throughput['last_24h']
99
- data_df = pd.DataFrame(throughput['data'])
100
- data_df = data_df.sort_values(by='updated_at').reset_index()
101
- data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
102
- data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
103
- data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
104
 
105
  m1, m2, m3 = st.columns(3)
106
- m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
107
- m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
108
- m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
 
 
 
 
 
109
 
110
  st.plotly_chart(
111
- px.line(data_df, x='updated_at', y=['Total validator data sent', 'Total received data'],
112
- labels={'updated_at':'Time', 'value':f'Data Transferred ({MEM_UNIT})', 'variable':'Direction'},
113
- ).update_traces(fill='tozeroy').update_layout(legend=dict(
114
- yanchor="top",
115
- y=0.99,
116
- xanchor="left",
117
- x=0.01
118
- )),
119
  use_container_width=True,
120
  )
121
 
122
  st.markdown('<br>', unsafe_allow_html=True)
123
 
 
124
  #### ------ LEADERBOARD ------
125
 
126
  st.subheader('Leaderboard')
@@ -129,31 +107,34 @@ m1, m2 = st.columns(2)
129
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
130
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
131
 
132
- df_m = fetch_metagraph_data()
133
- df_miners = fetch_leaderboard_data(df_m, ntop=ntop, entity_choice=entity_choice)
 
134
  # hide colorbar and don't show y axis
135
  st.plotly_chart(
136
- px.bar(df_miners.iloc[-ntop:], x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
137
  labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
138
  ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
139
  use_container_width=True,
140
  )
141
 
 
142
  with st.expander('Show raw metagraph data'):
143
  st.dataframe(df_m)
144
 
145
  st.markdown('<br>', unsafe_allow_html=True)
146
 
 
147
  #### ------ LOGGED RUNS ------
148
 
149
- # st.subheader('Logged runs')
150
- # st.info('The timeline shows the creation and last event time of each run.')
151
- # st.plotly_chart(
152
- # px.timeline(df, x_start='created_at', x_end='last_event_at', y='username', color='state',
153
- # labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
154
- # ),
155
- # use_container_width=True
156
- # )
157
-
158
- # with st.expander('Show raw run data'):
159
- # st.dataframe(df)
 
1
  import time
 
2
  import pandas as pd
 
 
3
  import streamlit as st
4
+ import plotly.express as px
5
 
6
  import utils
7
 
 
14
  """
15
 
16
  UPDATE_INTERVAL = 3600
17
+
18
 
19
  st.title('Folding Subnet Dashboard')
20
  st.markdown('<br>', unsafe_allow_html=True)
21
 
22
+ # reload data periodically
23
+ df = utils.build_data(time.time()//UPDATE_INTERVAL)
24
+ st.toast(f'Loaded {len(df)} runs')
 
 
 
 
25
 
26
+ # TODO: fix the factor for 24 hours ago
27
+ runs_alive_24h_ago = (df.last_event_at > pd.Timestamp.now() - pd.Timedelta('1d'))
28
+ df_24h = df.loc[runs_alive_24h_ago]
29
+ # correction factor to account for the fact that the data straddles the 24h boundary
30
+ # correction factor is based on the fraction of the run which occurred in the last 24h
31
+ # factor = (df_24h.last_event_at - pd.Timestamp.now() + pd.Timedelta('1d')) / pd.Timedelta('1d')
32
 
 
 
 
33
 
34
  #### ------ PRODUCTIVITY ------
35
 
 
37
  st.subheader('Productivity overview')
38
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
39
 
40
+ productivity = utils.get_productivity(df)
41
+ productivity_24h = utils.get_productivity(df_24h)
 
 
 
42
 
 
 
43
 
44
  m1, m2, m3 = st.columns(3)
45
+ m1.metric('Unique proteins folded', f'{productivity.get("unique_folded"):,.0f}', delta=f'{productivity_24h.get("unique_folded"):,.0f} (24h)')
46
+ m2.metric('Total proteins folded', f'{productivity.get("total_simulations"):,.0f}', delta=f'{productivity_24h.get("total_simulations"):,.0f} (24h)')
47
+ m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
48
+
49
  st.markdown('<br>', unsafe_allow_html=True)
50
 
51
+ time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
52
+
53
  PROD_CHOICES = {
 
54
  'Unique proteins folded': 'unique_pdbs',
55
+ 'Total simulations': 'total_pdbs',
56
+ 'Total simulation steps': 'total_md_steps',
57
  }
 
58
  prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
59
  prod_choice = PROD_CHOICES[prod_choice_label]
60
+ steps_running_total = time_binned_data[prod_choice].sum().cumsum()
 
 
 
 
 
 
 
 
 
 
 
 
61
  st.plotly_chart(
62
+ # add fillgradient to make it easier to see the trend
63
+ px.area(steps_running_total, y=prod_choice,
64
+ labels={'last_event_at':'', prod_choice: prod_choice_label},
65
+ ).update_traces(fill='tozeroy'),
66
  use_container_width=True,
67
  )
68
 
69
  st.markdown('<br>', unsafe_allow_html=True)
70
 
71
+
72
  #### ------ THROUGHPUT ------
73
  st.subheader('Throughput overview')
74
 
75
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
76
 
77
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
 
78
 
79
+ data_transferred = utils.get_data_transferred(df,unit=MEM_UNIT)
80
+ data_transferred_24h = utils.get_data_transferred(df_24h, unit=MEM_UNIT)
 
 
 
 
 
81
 
82
  m1, m2, m3 = st.columns(3)
83
+ m1.metric(f'Total sent data ({MEM_UNIT})', f'{data_transferred.get("sent"):,.0f}', delta=f'{data_transferred_24h.get("sent"):,.0f} (24h)')
84
+ m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred.get("received"):,.0f}', delta=f'{data_transferred_24h.get("received"):,.0f} (24h)')
85
+ m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred.get("total"):,.0f}', delta=f'{data_transferred_24h.get("total"):,.0f} (24h)')
86
+
87
+
88
+ IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
89
+ io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
90
+ io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
91
 
92
  st.plotly_chart(
93
+ px.area(io_running_total, y='value', color='variable',
94
+ labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
95
+ ),
 
 
 
 
 
96
  use_container_width=True,
97
  )
98
 
99
  st.markdown('<br>', unsafe_allow_html=True)
100
 
101
+
102
  #### ------ LEADERBOARD ------
103
 
104
  st.subheader('Leaderboard')
 
107
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
108
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
109
 
110
+ df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
111
+ df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
112
+
113
  # hide colorbar and don't show y axis
114
  st.plotly_chart(
115
+ px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
116
  labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
117
  ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
118
  use_container_width=True,
119
  )
120
 
121
+
122
  with st.expander('Show raw metagraph data'):
123
  st.dataframe(df_m)
124
 
125
  st.markdown('<br>', unsafe_allow_html=True)
126
 
127
+
128
  #### ------ LOGGED RUNS ------
129
 
130
+ st.subheader('Logged runs')
131
+ st.info('The timeline shows the creation and last event time of each run.')
132
+ st.plotly_chart(
133
+ px.timeline(df, x_start='created_at', x_end='last_event_at', y='username', color='state',
134
+ labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
135
+ ),
136
+ use_container_width=True
137
+ )
138
+
139
+ with st.expander('Show raw run data'):
140
+ st.dataframe(df)
classes.py DELETED
@@ -1,37 +0,0 @@
1
- from pydantic import BaseModel
2
- from datetime import datetime
3
- from typing import List
4
-
5
-
6
- class Data(BaseModel):
7
- last_event_at: List[datetime]
8
- cumulative_jobs: List[int]
9
-
10
- class ProductivityData(BaseModel):
11
- unique_folded: int
12
- total_completed_jobs: int
13
- unique_folded_data: Data
14
- total_completed_jobs_data: Data
15
-
16
-
17
- class Productivity(BaseModel):
18
- all_time: ProductivityData
19
- last_24h: ProductivityData
20
- last_30d: ProductivityData
21
-
22
- class ThroughputData(BaseModel):
23
- validator_sent: float
24
- miner_sent: float
25
-
26
- class Throughput(BaseModel):
27
- all_time: ThroughputData
28
- last_24h: ThroughputData
29
- data: dict
30
-
31
- class Metagraph(BaseModel):
32
- incentives: List[float]
33
- emissions: List[float]
34
- identities: List[str]
35
- hotkeys: List[str]
36
- coldkeys: List[str]
37
- trusts: List[float]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecosystem.config.js DELETED
@@ -1,14 +0,0 @@
1
- module.exports = {
2
- apps: [
3
- {
4
- name: 'hf-dashboard-api',
5
- script: '/home/spunion/Sn25/api.py',
6
- interpreter: '/home/spunion/Sn25/venv/bin/python',
7
- autorestart: true,
8
- watch: false,
9
- env: {
10
- NODE_ENV: 'production',
11
- },
12
- },
13
- ],
14
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -7,7 +7,5 @@ streamlit
7
  nbformat
8
  plotly
9
  pandas
10
- pydantic
11
- fastapi
12
- uvicorn
13
 
 
7
  nbformat
8
  plotly
9
  pandas
10
+ flask
 
 
11
 
utils.py CHANGED
@@ -1,13 +1,12 @@
1
- import json
2
  import os
3
- import time
4
-
5
- import bittensor as bt
6
- import numpy as np
7
- import pandas as pd
8
- import streamlit as st
9
  import tqdm
 
10
  import wandb
 
 
 
 
 
11
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
12
  # TODO: Store relevant wandb data in a database for faster access
13
 
@@ -16,7 +15,7 @@ import wandb
16
  MIN_STEPS = 12 # minimum number of steps in wandb run in order to be worth analyzing
17
  MAX_RUNS = 100#0000
18
  NETUID = 25
19
- BASE_PATH = 'macrocosmos/folding-validators' # added historical data from otf wandb and current data
20
  NETWORK = 'finney'
21
  KEYS = None
22
  ABBREV_CHARS = 8
@@ -24,12 +23,7 @@ ENTITY_CHOICES = ('identity', 'hotkey', 'coldkey')
24
 
25
  PDBS_PER_RUN_STEP = 0.083
26
  AVG_MD_STEPS = 30_000
27
- BASE_UNITS = 'GB'
28
- SAVE_PATH = 'current_runs/'
29
- # Check if the directory exists
30
- if not os.path.exists(SAVE_PATH):
31
- # If it doesn't exist, create the directory
32
- os.makedirs(SAVE_PATH)
33
 
34
  api = wandb.Api(timeout=120, api_key='cdcbe340bb7937d3a289d39632491d12b39231b7')
35
 
@@ -53,24 +47,24 @@ EXTRACTORS = {
53
  'run_id': lambda x: x.id,
54
  'user': lambda x: x.user.name[:16],
55
  'username': lambda x: x.user.username[:16],
56
- # 'created_at': lambda x: pd.Timestamp(x.created_at),
57
- 'last_event_at': lambda x: pd.to_datetime(x.summary.get('_timestamp'), errors='coerce'),
58
 
59
  'netuid': lambda x: x.config.get('netuid'),
60
  'mock': lambda x: x.config.get('neuron').get('mock'),
61
  'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
62
  'queue_size': lambda x: x.config.get('neuron').get('queue_size'),
63
  'timeout': lambda x: x.config.get('neuron').get('timeout'),
64
- # 'update_interval': lambda x: x.config.get('neuron').get('update_interval'),
65
  'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
66
  'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
67
 
68
  # This stuff is from the last logged event
69
  'num_steps': lambda x: x.summary.get('_step'),
70
- # 'runtime': lambda x: x.summary.get('_runtime'),
71
- # 'init_energy': lambda x: x.summary.get('init_energy'),
72
- # 'best_energy': lambda x: x.summary.get('best_loss'),
73
- # 'pdb_id': lambda x: x.summary.get('pdb_id'),
74
  'pdb_updates': lambda x: x.summary.get('updated_count'),
75
  'total_returned_sizes': lambda x: get_total_file_sizes(x),
76
  'total_sent_sizes': lambda x: get_total_md_input_sizes(x),
@@ -80,12 +74,10 @@ EXTRACTORS = {
80
  'version': lambda x: x.tags[0],
81
  'spec_version': lambda x: x.tags[1],
82
  'vali_hotkey': lambda x: x.tags[2],
83
-
84
  # System metrics
85
  'disk_read': lambda x: x.system_metrics.get('system.disk.in'),
86
  'disk_write': lambda x: x.system_metrics.get('system.disk.out'),
87
- 'network_sent': lambda x: x.system_metrics.get('system.network.sent'),
88
- 'network_recv': lambda x: x.system_metrics.get('system.network.recv'),
89
  # Really slow stuff below
90
  # 'started_at': lambda x: x.metadata.get('startedAt'),
91
  # 'disk_used': lambda x: x.metadata.get('disk').get('/').get('used'),
@@ -142,189 +134,44 @@ def get_total_md_input_sizes(run):
142
  return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
143
 
144
 
 
145
 
146
-
147
- def get_data_transferred(df, df_24h, unit='GB'):
148
- def safe_json_loads(x):
149
- try:
150
- return json.loads(x)
151
- except ValueError:
152
- return []
153
- def np_sum(x):
154
- try:
155
- # Flatten the list of lists and convert it to a NumPy array
156
- flat_array = np.array([item for sublist in x for item in sublist])
157
-
158
- # Use np.sum() to sum all elements in the flattened array
159
- total_sum = np.sum(flat_array)
160
- return total_sum
161
- except TypeError:
162
- return 0
163
- df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
164
- df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
165
- df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
166
- df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
167
- df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
168
- df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
169
- df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
170
-
171
- df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
172
- df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
173
- df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
174
- df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
175
- df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
176
-
177
-
178
- validator_sent = np.nansum(df['md_inputs_sum'].values)
179
- miner_sent = np.nansum(df['md_outputs_sum'].values)
180
- validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
181
- miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
182
-
183
- return {'all_time': {
184
- 'validator_sent': validator_sent,
185
- 'miner_sent': miner_sent,
186
- },
187
- 'last_24h': {
188
- 'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
189
- 'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
190
- },
191
- 'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
192
- }
193
-
194
- def calculate_productivity_data(df):
195
- completed_jobs = df[df['updated_count'] == 10]
196
- completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
197
- unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
198
- completed_jobs = completed_jobs.sort_values(by='last_event_at').reset_index()
199
- completed_jobs['cumulative_jobs'] = completed_jobs.index + 1
200
- unique_folded = unique_folded.sort_values(by='last_event_at').reset_index()
201
- unique_folded['cumulative_jobs'] = unique_folded.index + 1
202
  return {
203
- 'unique_folded': len(unique_folded),
204
- 'total_completed_jobs': len(completed_jobs),
205
- 'unique_folded_data': {'last_event_at': unique_folded['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':unique_folded['cumulative_jobs'].values},
206
- 'total_completed_jobs_data': {'last_event_at': completed_jobs['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':completed_jobs['cumulative_jobs'].values}
207
- }
208
-
209
- def get_productivity(df_all, df_24h, df_30d):
210
- result = {
211
- 'all_time': {
212
- 'unique_folded': 0,
213
- 'total_completed_jobs': 0,
214
- 'unique_folded_data': {},
215
- 'total_completed_jobs_data': {}
216
- },
217
- 'last_24h': {
218
- 'unique_folded': 0,
219
- 'total_completed_jobs': 0,
220
- "unique_folded_data": {},
221
- 'total_completed_jobs_data': {}
222
- },
223
- 'last_30d': {
224
- 'unique_folded': 0,
225
- 'total_completed_jobs': 0,
226
- "unique_folded_data": {},
227
- 'total_completed_jobs_data': {}
228
  }
229
- }
230
 
231
 
 
232
 
233
- if df_all is not None:
234
- result['all_time'].update(calculate_productivity_data(df_all))
235
-
236
- if df_24h is not None:
237
- result['last_24h'].update(calculate_productivity_data(df_24h))
238
-
239
- if df_30d is not None:
240
- result['last_30d'].update(calculate_productivity_data(df_30d))
241
- return result
242
 
243
- def get_leaderboard(df, entity_choice='identity'):
 
 
 
 
 
 
244
 
245
  df = df.loc[df.validator_permit==False]
246
  df.index = range(df.shape[0])
247
- return df.groupby(entity_choice).I.sum().sort_values().reset_index()
248
-
249
-
250
-
251
- def fetch_new_runs(base_path: str = BASE_PATH , netuid: int = 25, min_steps: int = 10, save_path: str= SAVE_PATH, extractors: dict = EXTRACTORS):
252
- runs_checker = pd.read_csv('runs_checker.csv')
253
- current_time = pd.to_datetime(time.time(), unit='s')
254
- current_time_str = current_time.strftime('%y-%m-%d') # Format as 'YYYYMMDD'
255
- new_ticker = runs_checker.check_ticker.max() + 1
256
-
257
- new_rows_list = []
258
-
259
- # update runs list based on all current runs running
260
- for run in api.runs(base_path):
261
- num_steps = run.summary.get('_step')
262
-
263
- if run.config.get('netuid') != netuid:
264
- continue
265
-
266
- if num_steps is None or num_steps < min_steps:
267
- continue
268
-
269
- if run.state =='running':
270
- new_rows_list.append({
271
- 'run_id': run.id,
272
- 'state': run.state,
273
- 'step': num_steps,
274
- 'check_time': current_time,
275
- 'check_ticker': new_ticker,
276
- 'user': run.user.name[:16],
277
- 'username': run.user.username[:16]
278
- })
279
- if new_rows_list:
280
- new_rows_df = pd.DataFrame(new_rows_list)
281
- runs_checker= pd.concat([runs_checker, new_rows_df], ignore_index=True)
282
- # save
283
- runs_checker.to_csv('runs_checker.csv', index=False)
284
-
285
- bt.logging.info(f'Cross checking runs for ticker {new_ticker} against previous ticker')
286
- previous_check = runs_checker[runs_checker.check_ticker==new_ticker - 1]
287
- current_check = runs_checker[runs_checker.check_ticker == new_ticker]
288
-
289
- # save ended runs from last check
290
- for run_id in previous_check.run_id:
291
- if run_id not in current_check.run_id:
292
-
293
- frame = load_run(f'{base_path}/{run_id}', extractors=EXTRACTORS)
294
-
295
- csv_path = os.path.join(save_path, f"{run_id}.csv")
296
- frame.to_csv(csv_path)
297
-
298
- # save new runs
299
- for run in api.runs(base_path):
300
- if run.config.get('netuid') != netuid:
301
- continue
302
- num_steps = run.summary.get('_step')
303
- if num_steps is None or num_steps < min_steps:
304
- continue
305
- if run.state =='running':
306
- frame = load_run(run_path='/'.join(run.path), extractors=EXTRACTORS)
307
- csv_path = os.path.join(save_path, f"{run.id}.csv")
308
- frame.to_csv(csv_path)
309
-
310
-
311
- def preload_data():
312
- # save all the paths of files to a list in a directory
313
- paths_list = []
314
- for path in os.listdir(SAVE_PATH):
315
- paths_list.append(os.path.join(SAVE_PATH, path))
316
-
317
- df_list = []
318
-
319
- for path in paths_list:
320
- df = pd.read_csv(path,low_memory=False)
321
- df_list.append(df)
322
-
323
- combined_df = pd.concat(df_list, ignore_index=True)
324
- return combined_df
325
 
326
  @st.cache_data()
327
- def get_metagraph():
 
328
  subtensor = bt.subtensor(network=NETWORK)
329
  m = subtensor.metagraph(netuid=NETUID)
330
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
@@ -341,26 +188,20 @@ def get_metagraph():
341
  return df_m
342
 
343
 
344
- def load_run(run_path: str, extractors: dict):
 
 
345
  print('Loading run:', run_path)
346
  run = api.run(run_path)
347
- df = pd.DataFrame(list(run.scan_history()))
348
-
349
  for col in ['updated_at', 'best_loss_at', 'created_at']:
350
  if col in df.columns:
351
  df[col] = pd.to_datetime(df[col])
352
- num_rows=len(df)
353
-
354
- extractor_df = {key: func(run) for key, func in extractors.items()}
355
- repeated_data = {key: [value] * num_rows for key, value in extractor_df.items()}
356
- extractor_df = pd.DataFrame(repeated_data)
357
-
358
- combined_df = pd.concat([df, extractor_df], axis=1)
359
-
360
- return combined_df
361
 
362
  @st.cache_data(show_spinner=False)
363
- def build_data(timestamp=None, paths=BASE_PATH, min_steps=MIN_STEPS, use_cache=True):
364
 
365
  save_path = '_saved_runs.csv'
366
  filters = {}
@@ -431,4 +272,10 @@ def load_state_vars():
431
  }
432
 
433
 
 
 
 
 
434
 
 
 
 
 
1
  import os
 
 
 
 
 
 
2
  import tqdm
3
+ import time
4
  import wandb
5
+ import streamlit as st
6
+ import pandas as pd
7
+ import bittensor as bt
8
+
9
+
10
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
11
  # TODO: Store relevant wandb data in a database for faster access
12
 
 
15
  MIN_STEPS = 12 # minimum number of steps in wandb run in order to be worth analyzing
16
  MAX_RUNS = 100#0000
17
  NETUID = 25
18
+ BASE_PATHS = ['macrocosmos/folding-validators--moved', 'macrocosmos/folding-validators'] # added historical data from otf wandb and current data
19
  NETWORK = 'finney'
20
  KEYS = None
21
  ABBREV_CHARS = 8
 
23
 
24
  PDBS_PER_RUN_STEP = 0.083
25
  AVG_MD_STEPS = 30_000
26
+ BASE_UNITS = 'MB'
 
 
 
 
 
27
 
28
  api = wandb.Api(timeout=120, api_key='cdcbe340bb7937d3a289d39632491d12b39231b7')
29
 
 
47
  'run_id': lambda x: x.id,
48
  'user': lambda x: x.user.name[:16],
49
  'username': lambda x: x.user.username[:16],
50
+ 'created_at': lambda x: pd.Timestamp(x.created_at),
51
+ 'last_event_at': lambda x: pd.Timestamp(x.summary.get('_timestamp'), unit='s'),
52
 
53
  'netuid': lambda x: x.config.get('netuid'),
54
  'mock': lambda x: x.config.get('neuron').get('mock'),
55
  'sample_size': lambda x: x.config.get('neuron').get('sample_size'),
56
  'queue_size': lambda x: x.config.get('neuron').get('queue_size'),
57
  'timeout': lambda x: x.config.get('neuron').get('timeout'),
58
+ 'update_interval': lambda x: x.config.get('neuron').get('update_interval'),
59
  'epoch_length': lambda x: x.config.get('neuron').get('epoch_length'),
60
  'disable_set_weights': lambda x: x.config.get('neuron').get('disable_set_weights'),
61
 
62
  # This stuff is from the last logged event
63
  'num_steps': lambda x: x.summary.get('_step'),
64
+ 'runtime': lambda x: x.summary.get('_runtime'),
65
+ 'init_energy': lambda x: x.summary.get('init_energy'),
66
+ 'best_energy': lambda x: x.summary.get('best_loss'),
67
+ 'pdb_id': lambda x: x.summary.get('pdb_id'),
68
  'pdb_updates': lambda x: x.summary.get('updated_count'),
69
  'total_returned_sizes': lambda x: get_total_file_sizes(x),
70
  'total_sent_sizes': lambda x: get_total_md_input_sizes(x),
 
74
  'version': lambda x: x.tags[0],
75
  'spec_version': lambda x: x.tags[1],
76
  'vali_hotkey': lambda x: x.tags[2],
77
+
78
  # System metrics
79
  'disk_read': lambda x: x.system_metrics.get('system.disk.in'),
80
  'disk_write': lambda x: x.system_metrics.get('system.disk.out'),
 
 
81
  # Really slow stuff below
82
  # 'started_at': lambda x: x.metadata.get('startedAt'),
83
  # 'disk_used': lambda x: x.metadata.get('disk').get('/').get('used'),
 
134
  return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
135
 
136
 
137
+ def get_data_transferred(df, unit='GB'):
138
 
139
+ factor = convert_unit(1, from_unit=BASE_UNITS, to_unit=unit)
140
+ sent = df.total_data_sent.sum()
141
+ received = df.total_data_received.sum()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  return {
143
+ 'sent':sent * factor,
144
+ 'received':received * factor,
145
+ 'total': (sent + received) * factor,
146
+ 'read':df.disk_read.sum() * factor,
147
+ 'write':df.disk_write.sum() * factor,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
 
149
 
150
 
151
+ def get_productivity(df):
152
 
153
+ # Estimate the number of unique pdbs folded using our heuristic
154
+ unique_folded = df.unique_pdbs.sum().round()
155
+ # Estimate the total number of simulations completed using our heuristic
156
+ total_simulations = df.total_pdbs.sum().round()
157
+ # Estimate the total number of simulation steps completed using our heuristic
158
+ total_md_steps = df.total_md_steps.sum().round()
 
 
 
159
 
160
+ return {
161
+ 'unique_folded': unique_folded,
162
+ 'total_simulations': total_simulations,
163
+ 'total_md_steps': total_md_steps,
164
+ }
165
+
166
+ def get_leaderboard(df, ntop=10, entity_choice='identity'):
167
 
168
  df = df.loc[df.validator_permit==False]
169
  df.index = range(df.shape[0])
170
+ return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  @st.cache_data()
173
+ def get_metagraph(time):
174
+ print(f'Loading metagraph with time {time}')
175
  subtensor = bt.subtensor(network=NETWORK)
176
  m = subtensor.metagraph(netuid=NETUID)
177
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
 
188
  return df_m
189
 
190
 
191
+ @st.cache_data()
192
+ def load_run(run_path, keys=KEYS):
193
+
194
  print('Loading run:', run_path)
195
  run = api.run(run_path)
196
+ df = pd.DataFrame(list(run.scan_history(keys=keys)))
 
197
  for col in ['updated_at', 'best_loss_at', 'created_at']:
198
  if col in df.columns:
199
  df[col] = pd.to_datetime(df[col])
200
+ print(f'+ Loaded {len(df)} records')
201
+ return df
 
 
 
 
 
 
 
202
 
203
  @st.cache_data(show_spinner=False)
204
+ def build_data(timestamp=None, paths=BASE_PATHS, min_steps=MIN_STEPS, use_cache=True):
205
 
206
  save_path = '_saved_runs.csv'
207
  filters = {}
 
272
  }
273
 
274
 
275
+ if __name__ == '__main__':
276
+
277
+ print('Loading runs')
278
+ df = load_runs()
279
 
280
+ df.to_csv('test_wandb_data.csv', index=False)
281
+ print(df)