Spaces:
Sleeping
Sleeping
Upload data_loader.py with huggingface_hub
Browse files- data_loader.py +30 -7
data_loader.py
CHANGED
|
@@ -43,19 +43,42 @@ def load_data() -> tuple[pd.DataFrame, pd.DataFrame, dict]:
|
|
| 43 |
if "walltime_used" in jobs.columns:
|
| 44 |
jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
|
| 45 |
|
| 46 |
-
#
|
| 47 |
if "launch_method" in jobs.columns and "last_state" in jobs.columns:
|
| 48 |
import numpy as np
|
| 49 |
|
| 50 |
-
from config import INTERACTIVE_METHODS
|
| 51 |
|
| 52 |
-
is_timeout = jobs["last_state"] == "TIMEOUT"
|
| 53 |
is_interactive = jobs["launch_method"].isin(INTERACTIVE_METHODS)
|
| 54 |
-
jobs
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# Queue wait time
|
| 61 |
if "submit_time" in jobs.columns and "start_time" in jobs.columns:
|
|
|
|
| 43 |
if "walltime_used" in jobs.columns:
|
| 44 |
jobs["walltime_hours"] = jobs["walltime_used"] / 3600.0
|
| 45 |
|
| 46 |
+
# Behavioral outcome classification
|
| 47 |
if "launch_method" in jobs.columns and "last_state" in jobs.columns:
|
| 48 |
import numpy as np
|
| 49 |
|
| 50 |
+
from config import INTERACTIVE_METHODS, QUICK_EXIT_SECONDS
|
| 51 |
|
|
|
|
| 52 |
is_interactive = jobs["launch_method"].isin(INTERACTIVE_METHODS)
|
| 53 |
+
wt = jobs.get("walltime_used", pd.Series(dtype="float64"))
|
| 54 |
+
state = jobs["last_state"]
|
| 55 |
+
|
| 56 |
+
# Start with batch classification (maps exit state directly)
|
| 57 |
+
outcome = state.map(
|
| 58 |
+
{
|
| 59 |
+
"COMPLETED": "Completed",
|
| 60 |
+
"FAILED": "Failed",
|
| 61 |
+
"TIMEOUT": "Timed Out",
|
| 62 |
+
"OUT_OF_MEMORY": "Out of Memory",
|
| 63 |
+
}
|
| 64 |
+
).fillna("Cancelled") # All CANCELLED variants + NODE_FAIL → "Cancelled"
|
| 65 |
+
|
| 66 |
+
# Override for interactive jobs
|
| 67 |
+
is_quick = is_interactive & (wt < QUICK_EXIT_SECONDS)
|
| 68 |
+
is_failed_interactive = is_interactive & state.isin({"FAILED", "OUT_OF_MEMORY"})
|
| 69 |
+
is_user_ended = (
|
| 70 |
+
is_interactive & ~is_quick & ~is_failed_interactive & state.str.startswith("CANCELLED")
|
| 71 |
)
|
| 72 |
+
is_session_expired = (
|
| 73 |
+
is_interactive & ~is_quick & ~is_failed_interactive & (state == "TIMEOUT")
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
outcome = np.where(is_quick, "Quick Exit", outcome)
|
| 77 |
+
outcome = np.where(is_failed_interactive, "Failed", outcome)
|
| 78 |
+
outcome = np.where(is_user_ended, "User Ended", outcome)
|
| 79 |
+
outcome = np.where(is_session_expired, "Session Expired", outcome)
|
| 80 |
+
|
| 81 |
+
jobs["outcome_category"] = outcome
|
| 82 |
|
| 83 |
# Queue wait time
|
| 84 |
if "submit_time" in jobs.columns and "start_time" in jobs.columns:
|