buckeyeguy commited on
Commit
697bdeb
·
verified ·
1 Parent(s): 6625003

v2: update data_loader.py

Browse files
Files changed (1) hide show
  1. data_loader.py +19 -0
data_loader.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import streamlit as st
9
  from huggingface_hub import hf_hub_download
10
 
 
11
  DATASET_REPO = "buckeyeguy/osc-usage-data"
12
 
13
 
@@ -42,6 +43,24 @@ def load_data() -> tuple[pd.DataFrame, pd.DataFrame, dict]:
42
  if "walltime_used" in jobs.columns:
43
  jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  return jobs, snapshots, metadata
46
 
47
 
 
8
  import streamlit as st
9
  from huggingface_hub import hf_hub_download
10
 
11
+
12
  DATASET_REPO = "buckeyeguy/osc-usage-data"
13
 
14
 
 
43
  if "walltime_used" in jobs.columns:
44
  jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
45
 
46
+ # Timeout classification — interactive vs batch
47
+ if "launch_method" in jobs.columns and "last_state" in jobs.columns:
48
+ import numpy as np
49
+
50
+ from config import INTERACTIVE_METHODS
51
+
52
+ is_timeout = jobs["last_state"] == "TIMEOUT"
53
+ is_interactive = jobs["launch_method"].isin(INTERACTIVE_METHODS)
54
+ jobs["timeout_category"] = np.where(
55
+ ~is_timeout,
56
+ jobs["last_state"],
57
+ np.where(is_interactive, "Interactive Timeout", "Batch Timeout"),
58
+ )
59
+
60
+ # Queue wait time
61
+ if "submit_time" in jobs.columns and "start_time" in jobs.columns:
62
+ jobs["wait_hours"] = (jobs["start_time"] - jobs["submit_time"]).dt.total_seconds() / 3600.0
63
+
64
  return jobs, snapshots, metadata
65
 
66