Spaces:
Sleeping
Sleeping
Sarkosos
commited on
Commit
·
14285d3
1
Parent(s):
5144f34
added plots for total jobs done and unique proteins folded
Browse files
api.py
CHANGED
|
@@ -8,7 +8,7 @@ import utils
|
|
| 8 |
import pandas as pd
|
| 9 |
import uvicorn
|
| 10 |
|
| 11 |
-
from classes import Productivity, ProductivityData, Throughput
|
| 12 |
|
| 13 |
|
| 14 |
# Global variables (saves time on loading data)
|
|
@@ -64,9 +64,9 @@ def productivity_metrics():
|
|
| 64 |
# Unpack the metrics using the correct keys
|
| 65 |
result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
|
| 66 |
all_time = ProductivityData(**result['all_time'])
|
| 67 |
-
last_24h =
|
| 68 |
|
| 69 |
-
return
|
| 70 |
|
| 71 |
|
| 72 |
@app.get("/throughput", response_model=Throughput)
|
|
|
|
| 8 |
import pandas as pd
|
| 9 |
import uvicorn
|
| 10 |
|
| 11 |
+
from classes import Productivity, ProductivityData, Last24hProductivityData, Throughput
|
| 12 |
|
| 13 |
|
| 14 |
# Global variables (saves time on loading data)
|
|
|
|
| 64 |
# Unpack the metrics using the correct keys
|
| 65 |
result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
|
| 66 |
all_time = ProductivityData(**result['all_time'])
|
| 67 |
+
last_24h = Last24hProductivityData(**result['last_24h'])
|
| 68 |
|
| 69 |
+
return {"all_time": all_time, "last_24h": last_24h}
|
| 70 |
|
| 71 |
|
| 72 |
@app.get("/throughput", response_model=Throughput)
|
app.py
CHANGED
|
@@ -31,39 +31,56 @@ st.subheader('Productivity overview')
|
|
| 31 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
| 32 |
|
| 33 |
productivity_all = requests.get(f'{BASE_URL}/productivity').json()
|
| 34 |
-
|
|
|
|
| 35 |
productivity_24h = productivity_all['last_24h']
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
# st.write(productivity_24h)
|
| 40 |
|
| 41 |
m1, m2 = st.columns(2)
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
|
| 45 |
|
| 46 |
-
# m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
|
| 47 |
|
| 48 |
-
# st.markdown('<br>', unsafe_allow_html=True)
|
| 49 |
|
| 50 |
-
#
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
st.markdown('<br>', unsafe_allow_html=True)
|
| 69 |
|
|
|
|
| 31 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
| 32 |
|
| 33 |
productivity_all = requests.get(f'{BASE_URL}/productivity').json()
|
| 34 |
+
completed_jobs = productivity_all['all_time']['total_completed_jobs']
|
| 35 |
+
|
| 36 |
productivity_24h = productivity_all['last_24h']
|
| 37 |
+
completed_jobs = pd.DataFrame(completed_jobs)
|
| 38 |
+
completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
|
| 39 |
|
| 40 |
+
unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
|
| 41 |
+
unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
|
|
|
|
| 42 |
|
| 43 |
m1, m2 = st.columns(2)
|
| 44 |
+
m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
|
| 45 |
+
m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
|
| 46 |
|
| 47 |
+
st.markdown('<br>', unsafe_allow_html=True)
|
|
|
|
| 48 |
|
|
|
|
| 49 |
|
|
|
|
| 50 |
|
| 51 |
+
# time_binned_data_complete = completed_jobs.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
|
| 52 |
+
# time_binned_data_unique = unique_folded.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
|
| 53 |
|
| 54 |
+
PROD_CHOICES = {
|
| 55 |
+
'Unique proteins folded': 'unique_pdbs',
|
| 56 |
+
'Total simulations': 'total_pdbs',
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
|
| 62 |
+
prod_choice = PROD_CHOICES[prod_choice_label]
|
| 63 |
+
|
| 64 |
+
PROD_DATA = {
|
| 65 |
+
'unique_pdbs': unique_folded,
|
| 66 |
+
'total_pdbs': completed_jobs,
|
| 67 |
+
}
|
| 68 |
+
df = PROD_DATA[prod_choice]
|
| 69 |
+
|
| 70 |
+
df = df.sort_values(by='last_event_at').reset_index()
|
| 71 |
+
|
| 72 |
+
# Create a cumulative count column
|
| 73 |
+
df['cumulative_jobs'] = df.index + 1
|
| 74 |
+
|
| 75 |
+
# Plot the cumulative jobs over time
|
| 76 |
+
|
| 77 |
+
st.plotly_chart(
|
| 78 |
+
# add fillgradient to make it easier to see the trend
|
| 79 |
+
px.line(df, x='last_event_at', y='cumulative_jobs',
|
| 80 |
+
title='Total Jobs Completed Over Time',
|
| 81 |
+
labels={'last_event_at': 'Time', 'cumulative_jobs': 'Total Jobs Completed'}).update_traces(fill='tozeroy'),
|
| 82 |
+
use_container_width=True,
|
| 83 |
+
)
|
| 84 |
|
| 85 |
st.markdown('<br>', unsafe_allow_html=True)
|
| 86 |
|
classes.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
|
| 3 |
class ProductivityData(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
unique_folded: int
|
| 5 |
total_completed_jobs: int
|
| 6 |
|
| 7 |
class Productivity(BaseModel):
|
| 8 |
all_time: ProductivityData
|
| 9 |
-
last_24h:
|
| 10 |
|
| 11 |
class ThroughputData(BaseModel):
|
| 12 |
validator_sent: float
|
|
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
|
| 3 |
class ProductivityData(BaseModel):
|
| 4 |
+
total_completed_jobs: dict[str, dict[int, str]]
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Last24hProductivityData(BaseModel):
|
| 9 |
unique_folded: int
|
| 10 |
total_completed_jobs: int
|
| 11 |
|
| 12 |
class Productivity(BaseModel):
|
| 13 |
all_time: ProductivityData
|
| 14 |
+
last_24h: Last24hProductivityData
|
| 15 |
|
| 16 |
class ThroughputData(BaseModel):
|
| 17 |
validator_sent: float
|
utils.py
CHANGED
|
@@ -164,7 +164,6 @@ def get_data_transferred(df, unit='GB'):
|
|
| 164 |
def get_productivity(df_all, df_24h):
|
| 165 |
result = {
|
| 166 |
'all_time': {
|
| 167 |
-
'unique_folded': 0,
|
| 168 |
'total_completed_jobs': 0
|
| 169 |
},
|
| 170 |
'last_24h': {
|
|
@@ -173,19 +172,16 @@ def get_productivity(df_all, df_24h):
|
|
| 173 |
}
|
| 174 |
}
|
| 175 |
if df_all is not None:
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
total_historical_run_updates = df_all.active.isna().sum()
|
| 180 |
-
total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
|
| 181 |
|
| 182 |
result['all_time'].update({
|
| 183 |
-
'
|
| 184 |
-
'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
|
| 185 |
})
|
| 186 |
|
| 187 |
if df_24h is not None:
|
| 188 |
-
completed_jobs_24h = df_24h[df_24h['updated_count']
|
| 189 |
unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
|
| 190 |
result['last_24h'].update({
|
| 191 |
'unique_folded': len(unique_completed_jobs_24h),
|
|
|
|
| 164 |
def get_productivity(df_all, df_24h):
|
| 165 |
result = {
|
| 166 |
'all_time': {
|
|
|
|
| 167 |
'total_completed_jobs': 0
|
| 168 |
},
|
| 169 |
'last_24h': {
|
|
|
|
| 172 |
}
|
| 173 |
}
|
| 174 |
if df_all is not None:
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
completed_jobs = df_all[df_all['updated_count'] == 10]
|
|
|
|
|
|
|
| 178 |
|
| 179 |
result['all_time'].update({
|
| 180 |
+
'total_completed_jobs': completed_jobs[["updated_at", "pdb_id"]].to_dict(),
|
|
|
|
| 181 |
})
|
| 182 |
|
| 183 |
if df_24h is not None:
|
| 184 |
+
completed_jobs_24h = df_24h[df_24h['updated_count'] == 10]
|
| 185 |
unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
|
| 186 |
result['last_24h'].update({
|
| 187 |
'unique_folded': len(unique_completed_jobs_24h),
|