Spaces:
Running
Running
v2: update data_loader.py
Browse files- data_loader.py +19 -0
data_loader.py
CHANGED
|
@@ -8,6 +8,7 @@ import pandas as pd
|
|
| 8 |
import streamlit as st
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
|
|
|
|
| 11 |
DATASET_REPO = "buckeyeguy/osc-usage-data"
|
| 12 |
|
| 13 |
|
|
@@ -42,6 +43,24 @@ def load_data() -> tuple[pd.DataFrame, pd.DataFrame, dict]:
|
|
| 42 |
if "walltime_used" in jobs.columns:
|
| 43 |
jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
return jobs, snapshots, metadata
|
| 46 |
|
| 47 |
|
|
|
|
| 8 |
import streamlit as st
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
|
| 11 |
+
|
| 12 |
DATASET_REPO = "buckeyeguy/osc-usage-data"
|
| 13 |
|
| 14 |
|
|
|
|
| 43 |
if "walltime_used" in jobs.columns:
|
| 44 |
jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
|
| 45 |
|
| 46 |
+
# Timeout classification — interactive vs batch
|
| 47 |
+
if "launch_method" in jobs.columns and "last_state" in jobs.columns:
|
| 48 |
+
import numpy as np
|
| 49 |
+
|
| 50 |
+
from config import INTERACTIVE_METHODS
|
| 51 |
+
|
| 52 |
+
is_timeout = jobs["last_state"] == "TIMEOUT"
|
| 53 |
+
is_interactive = jobs["launch_method"].isin(INTERACTIVE_METHODS)
|
| 54 |
+
jobs["timeout_category"] = np.where(
|
| 55 |
+
~is_timeout,
|
| 56 |
+
jobs["last_state"],
|
| 57 |
+
np.where(is_interactive, "Interactive Timeout", "Batch Timeout"),
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Queue wait time
|
| 61 |
+
if "submit_time" in jobs.columns and "start_time" in jobs.columns:
|
| 62 |
+
jobs["wait_hours"] = (jobs["start_time"] - jobs["submit_time"]).dt.total_seconds() / 3600.0
|
| 63 |
+
|
| 64 |
return jobs, snapshots, metadata
|
| 65 |
|
| 66 |
|