Spaces:

macrocosm-os
/

Sn25

Sleeping

App Files Files Community

Sarkosos commited on Aug 27, 2024

Commit

14285d3

1 Parent(s): 5144f34

added plots for total jobs done and unique proteins folded

Browse files

Files changed (4) hide show

api.py +3 -3
app.py +41 -24
classes.py +6 -1
utils.py +5 -9

api.py CHANGED Viewed

@@ -8,7 +8,7 @@ import utils
 import pandas as pd
 import uvicorn
-from classes import Productivity, ProductivityData, Throughput
 # Global variables (saves time on loading data)
@@ -64,9 +64,9 @@ def productivity_metrics():
     # Unpack the metrics using the correct keys
     result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
     all_time = ProductivityData(**result['all_time'])
-    last_24h = ProductivityData(**result['last_24h'])
-    return Productivity(all_time=all_time, last_24h=last_24h)
 @app.get("/throughput", response_model=Throughput)

 import pandas as pd
 import uvicorn
+from classes import Productivity, ProductivityData, Last24hProductivityData, Throughput
 # Global variables (saves time on loading data)
     # Unpack the metrics using the correct keys
     result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
     all_time = ProductivityData(**result['all_time'])
+    last_24h = Last24hProductivityData(**result['last_24h'])
+    return {"all_time": all_time, "last_24h": last_24h}
 @app.get("/throughput", response_model=Throughput)

app.py CHANGED Viewed

@@ -31,39 +31,56 @@ st.subheader('Productivity overview')
 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
 productivity_all = requests.get(f'{BASE_URL}/productivity').json()
-productivity = productivity_all['all_time']
 productivity_24h = productivity_all['last_24h']
-# st.write(productivity_all)
-# # st.write(productivity)
-# st.write(productivity_24h)
 m1, m2 = st.columns(2)
-m1.metric('Unique proteins folded', f'{productivity.get("unique_folded", 0):,.0f}', delta=f'{productivity_24h.get("unique_folded", 0):,.0f} (24h)')
-m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
-# m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
-# st.markdown('<br>', unsafe_allow_html=True)
-# time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
-# PROD_CHOICES = {
-#     'Unique proteins folded': 'unique_pdbs',
-#     'Total simulations': 'total_pdbs',
-#     'Total simulation steps': 'total_md_steps',
-# }
-# prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
-# prod_choice = PROD_CHOICES[prod_choice_label]
-# steps_running_total = time_binned_data[prod_choice].sum().cumsum()
-# st.plotly_chart(
-#     # add fillgradient to make it easier to see the trend
-#     px.area(steps_running_total, y=prod_choice,
-#             labels={'last_event_at':'', prod_choice: prod_choice_label},
-#     ).update_traces(fill='tozeroy'),
-#     use_container_width=True,
-# )
 st.markdown('<br>', unsafe_allow_html=True)

 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
 productivity_all = requests.get(f'{BASE_URL}/productivity').json()
+completed_jobs = productivity_all['all_time']['total_completed_jobs']
 productivity_24h = productivity_all['last_24h']
+completed_jobs = pd.DataFrame(completed_jobs)
+completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
+unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
+unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
 m1, m2 = st.columns(2)
+m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
+m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
+st.markdown('<br>', unsafe_allow_html=True)
+# time_binned_data_complete = completed_jobs.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
+# time_binned_data_unique = unique_folded.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
+PROD_CHOICES = {
+    'Unique proteins folded': 'unique_pdbs',
+    'Total simulations': 'total_pdbs',
+}
+prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
+prod_choice = PROD_CHOICES[prod_choice_label]
+PROD_DATA = {
+    'unique_pdbs': unique_folded,
+    'total_pdbs': completed_jobs,
+}
+df = PROD_DATA[prod_choice]
+df = df.sort_values(by='last_event_at').reset_index()
+# Create a cumulative count column
+df['cumulative_jobs'] = df.index + 1
+# Plot the cumulative jobs over time
+st.plotly_chart(
+    # add fillgradient to make it easier to see the trend
+    px.line(df, x='last_event_at', y='cumulative_jobs',
+              title='Total Jobs Completed Over Time',
+              labels={'last_event_at': 'Time', 'cumulative_jobs': 'Total Jobs Completed'}).update_traces(fill='tozeroy'),
+    use_container_width=True,
+)
 st.markdown('<br>', unsafe_allow_html=True)

classes.py CHANGED Viewed

@@ -1,12 +1,17 @@
 from pydantic import BaseModel
 class ProductivityData(BaseModel):
     unique_folded: int
     total_completed_jobs: int
 class Productivity(BaseModel):
     all_time: ProductivityData
-    last_24h: ProductivityData
 class ThroughputData(BaseModel):
     validator_sent: float

 from pydantic import BaseModel
 class ProductivityData(BaseModel):
+    total_completed_jobs: dict[str, dict[int, str]]
+class Last24hProductivityData(BaseModel):
     unique_folded: int
     total_completed_jobs: int
 class Productivity(BaseModel):
     all_time: ProductivityData
+    last_24h: Last24hProductivityData
 class ThroughputData(BaseModel):
     validator_sent: float

utils.py CHANGED Viewed

@@ -164,7 +164,6 @@ def get_data_transferred(df, unit='GB'):
 def get_productivity(df_all, df_24h):
     result = {
         'all_time': {
-            'unique_folded': 0,
             'total_completed_jobs': 0
         },
         'last_24h': {
@@ -173,19 +172,16 @@ def get_productivity(df_all, df_24h):
         }
     }
     if df_all is not None:
-        unique_folded_all = len(df_all.pdb_id.value_counts())
-        completed_jobs_all = len(df_all[df_all.active == False])
-        total_historical_run_updates = df_all.active.isna().sum()
-        total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
         result['all_time'].update({
-            'unique_folded': unique_folded_all,
-            'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
         })
     if df_24h is not None:
-        completed_jobs_24h = df_24h[df_24h['updated_count'] >= 10]
         unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
         result['last_24h'].update({
             'unique_folded': len(unique_completed_jobs_24h),

 def get_productivity(df_all, df_24h):
     result = {
         'all_time': {
             'total_completed_jobs': 0
         },
         'last_24h': {
         }
     }
     if df_all is not None:
+        completed_jobs = df_all[df_all['updated_count'] == 10]
         result['all_time'].update({
+            'total_completed_jobs': completed_jobs[["updated_at", "pdb_id"]].to_dict(),
         })
     if df_24h is not None:
+        completed_jobs_24h = df_24h[df_24h['updated_count'] == 10]
         unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
         result['last_24h'].update({
             'unique_folded': len(unique_completed_jobs_24h),