saraht14's picture
write update REMOVE BEFORE DEPLOY
0c0937c verified
# change the eval ftn to take a list of lists
import gradio as gr
import pandas as pd
import time
import torch
import os
import torchvision.transforms as transforms
from torchvision import datasets
import torch.nn.functional as F
from torch.utils.data import DataLoader
import subprocess
import zipfile
import shutil
import numpy as np
import importlib.util
import inspect
from huggingface_hub import HfApi
from datasets import load_dataset, Dataset
from huggingface_hub import login, hf_hub_download
import requests
import matplotlib
from tabulate import tabulate
from datasets import Features, Value
from datasets import Features, Value, DatasetDict
import random
from datetime import datetime
import tempfile
import json
import sys
import psutil # to monitor cpu usage
import os
import os, time, psutil, shutil
def safe(callable_, default=None):
try:
return callable_()
except Exception:
return default
def effective_cpu_count():
# cgroup v2 cpu quota if available
try:
with open("/sys/fs/cgroup/cpu.max", "r") as f:
quota, period = f.read().strip().split()
if quota == "max":
return float(os.cpu_count() or 1)
return float(quota) / float(period)
except Exception:
return float(os.cpu_count() or 1)
def read_cgroup_memory():
# cgroup v2 (best-effort)
out = {}
try:
out["mem_current"] = int(open("/sys/fs/cgroup/memory.current").read().strip())
mx = open("/sys/fs/cgroup/memory.max").read().strip()
out["mem_max"] = (None if mx == "max" else int(mx))
except Exception:
pass
return out
def read_cgroup_cpu_stat():
out = {}
try:
txt = open("/sys/fs/cgroup/cpu.stat").read().strip().splitlines()
for line in txt:
k, v = line.split()
out[k] = int(v)
except Exception:
pass
return out
def proc_tree(proot: psutil.Process):
procs = [proot] + safe(lambda: proot.children(recursive=True), [])
return [p for p in procs if safe(p.is_running, False)]
def snapshot_resources(tag="[snap]", root_pid=None, interval=1.0, watch_dir=None):
proot = psutil.Process(root_pid) if root_pid else psutil.Process()
eff_cpus = effective_cpu_count()
# warm cpu% (per-process cpu_percent needs priming)
for p in proc_tree(proot):
safe(lambda p=p: p.cpu_percent(interval=None))
# sleep for interval to measure rates
time.sleep(interval)
procs = proc_tree(proot)
# aggregate CPU time, RSS, IO, threads, fds
cpu_time = 0.0
rss = 0
rbytes = 0
wbytes = 0
threads = 0
fds = 0
open_files = 0
for p in procs:
ct = safe(p.cpu_times)
if ct:
cpu_time += (ct.user + ct.system)
mi = safe(p.memory_info)
if mi:
rss += mi.rss
io = safe(p.io_counters)
if io:
rbytes += getattr(io, "read_bytes", 0)
wbytes += getattr(io, "write_bytes", 0)
threads += (safe(p.num_threads, 0) or 0)
fds += (safe(p.num_fds, 0) or 0) # linux only
of = safe(p.open_files, []) # can be expensive on some systems
open_files += len(of) if of else 0
# host/system context (noisy but useful)
host_cpu = safe(lambda: psutil.cpu_percent(interval=None))
host_mem = safe(lambda: psutil.virtual_memory().percent)
cpu_times_pct = safe(lambda: psutil.cpu_times_percent(interval=None, percpu=False))
iowait = getattr(cpu_times_pct, "iowait", None) if cpu_times_pct else None
loadavg = os.getloadavg() if hasattr(os, "getloadavg") else None
disk = safe(lambda: psutil.disk_io_counters())
disk_read = getattr(disk, "read_bytes", None) if disk else None
disk_write = getattr(disk, "write_bytes", None) if disk else None
# directory usage (good for sandbox scratch growth)
dir_used = None
if watch_dir:
du = safe(lambda: shutil.disk_usage(watch_dir))
if du:
dir_used = du.used
cg_mem = read_cgroup_memory()
cg_cpu = read_cgroup_cpu_stat()
out = {
"tag": tag,
"n_procs_tree": len(procs),
"eff_cpus": eff_cpus,
"host_cpu_percent": host_cpu,
"host_mem_percent": host_mem,
"host_iowait_percent": iowait,
"host_loadavg": loadavg,
"rss_MB_tree": rss / (1024**2),
"read_MB_tree": rbytes / (1024**2),
"write_MB_tree": wbytes / (1024**2),
"threads_tree": threads,
"fds_tree": fds,
"open_files_tree": open_files,
"disk_read_bytes_host": disk_read,
"disk_write_bytes_host": disk_write,
"watch_dir_used_MB": (dir_used / (1024**2)) if dir_used is not None else None,
"cgroup_mem_current_MB": (cg_mem.get("mem_current", 0) / (1024**2)) if cg_mem else None,
"cgroup_mem_max_MB": (cg_mem.get("mem_max", 0) / (1024**2)) if cg_mem and cg_mem.get("mem_max") else None,
"cgroup_cpu_stat": cg_cpu if cg_cpu else None,
}
# one-line log
print(
f"{tag} procs={out['n_procs_tree']} effCPU={eff_cpus:.2f} "
f"hostCPU={host_cpu:.1f}% mem={host_mem:.1f}% iowait={iowait} "
f"RSS={out['rss_MB_tree']:.1f}MB R={out['read_MB_tree']:.1f}MB W={out['write_MB_tree']:.1f}MB "
f"thr={threads} fds={fds} dirUsedMB={out['watch_dir_used_MB']}"
)
return out
def monitor_system(prefix="", seconds=10):
for _ in range(seconds):
cpu = psutil.cpu_percent(interval=1)
mem = psutil.virtual_memory().percent
la = os.getloadavg() if hasattr(os, "getloadavg") else None
print(f"{prefix} cpu%={cpu:.1f} mem%={mem:.1f} loadavg={la}")
def print_thread_env(tag):
print(f"\n===== {tag} =====")
for k in [
"LOKY_MAX_CPU_COUNT",
"OMP_NUM_THREADS",
"MKL_NUM_THREADS",
"OPENBLAS_NUM_THREADS",
"NUMEXPR_NUM_THREADS",
]:
print(f"{k} =", os.environ.get(k))
print("====================\n")
print_thread_env("HF SPACE DEFAULT ENV")
os.environ.setdefault("LOKY_MAX_CPU_COUNT", "1")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")
def monitor_cpu_usage():
# Get the current process
process = psutil.Process()
# Get CPU usage percentage for the current process
# interval=None takes non-blocking measurement, then you can print it
cpu_percent = process.cpu_percent(interval=1)
print(f"Current process CPU usage: {cpu_percent}%")
print("sys.executable:", sys.executable)
print("which python3:", shutil.which("python3"))
print("which python:", shutil.which("python"))
# print("sys.executable:", sys.executable)
# print("which python3:", shutil.which("python3"))
# print("which python:", shutil.which("python"))
matplotlib.use("Agg")
random.seed()
HF_TOKEN = os.environ.get("ALL_TOKEN")
OWNER = "IndoorOutdoor"
# READ_TOKEN = os.environ.get("read_token")
# REPO_ID = f"{OWNER}/leaderboard"
# QUEUE_REPO = f"{OWNER}/requests"
RESULTS_REPO = f"{OWNER}/docker_new_leaderboard"
SUBMISSIONS_REPO = f"{OWNER}/docker_new_submissions"
HEADERS = ["Model Name", "Group Name", "Execution Time (s)","Processing Time (s)", "Accuracy", "TP", "FP", "FN", "TN"]
LEADERBOARD_FEATURES = Features({
"Model Name": Value("string"),
"Group Name": Value("string"),
"Execution Time (s)": Value("float64"),
"Processing Time (s)": Value("float64"),
"Accuracy": Value("float64"),
"TP": Value("float64"),
"FP": Value("float64"),
"FN": Value("float64"),
"TN": Value("float64"),
})
SENTINEL_NAME = "__INIT__"
SENTINEL_ROW = {
"Model Name": SENTINEL_NAME,
"Group Name": SENTINEL_NAME,
"Execution Time (s)": 0.0,
"Processing Time (s)": 0.0,
"Accuracy": 0.0,
"TP": 0.0,
"FP": 0.0,
"FN": 0.0,
"TN": 0.0,
}
# Submissions dataset: one row per submission, including zip path + stats
SUBMISSIONS_HEADERS = [
"Timestamp", # human-readable
"Model Name",
"Group Name",
"Execution Time (s)",
"Processing Time (s)",
"Accuracy",
"TP",
"FP",
"FN",
"TN",
"Zip Path", # path of the zip inside the HF dataset repo
]
SUBMISSIONS_FEATURES = Features({
"Timestamp": Value("string"),
"Model Name": Value("string"),
"Group Name": Value("string"),
"Execution Time (s)": Value("float64"),
"Processing Time (s)": Value("float64"),
"Accuracy": Value("float64"),
"TP": Value("float64"),
"FP": Value("float64"),
"FN": Value("float64"),
"TN": Value("float64"),
"Zip Path": Value("string"),
})
SENTINEL_SUBMISSIONS_ROW = {
"Timestamp": "1970-01-01 00:00:00",
"Model Name": SENTINEL_NAME,
"Group Name": SENTINEL_NAME,
"Execution Time (s)": 0.0,
"Processing Time (s)": 0.0,
"Accuracy": 0.0,
"TP": 0.0,
"FP": 0.0,
"FN": 0.0,
"TN": 0.0,
"Zip Path": "",
}
import shutil, subprocess, os, textwrap
print("CONSTANTS")
def sandbox_probe():
print("\n===== SANDBOX PROBE =====")
print("cwd:", os.getcwd())
for tool in ["unshare", "bwrap", "firejail", "nsjail"]:
print(f"{tool}: {shutil.which(tool)}")
# Try unshare
try:
r = subprocess.run(
["unshare", "-n", "--fork", "python", "-c", "print('ok')"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=5
)
print("unshare test rc:", r.returncode)
print("unshare stderr:", (r.stderr or "").strip())
except Exception as e:
print("unshare test exception:", repr(e))
# Try bwrap
try:
r = subprocess.run(
["bwrap", "--unshare-net", "--die-with-parent", "--proc", "/proc", "--dev", "/dev",
sys.executable, "-c", "print('ok')"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=5
)
print("bwrap test rc:", r.returncode)
print("bwrap stderr:", (r.stderr or "").strip())
except Exception as e:
print("bwrap test exception:", repr(e))
print("===== END SANDBOX PROBE =====\n")
# sandbox_probe()
def ensure_submissions_repo_exists():
try:
_ = load_dataset(SUBMISSIONS_REPO, split="train", token=HF_TOKEN, download_mode="force_redownload")
return
except Exception as e:
print("[init] submissions repo missing; creating:", e)
base = Dataset.from_pandas(
pd.DataFrame([SENTINEL_SUBMISSIONS_ROW]),
features=SUBMISSIONS_FEATURES
)
DatasetDict({"train": base}).push_to_hub(SUBMISSIONS_REPO, token=HF_TOKEN)
print("[init] seeded submissions repo.")
def ensure_results_repo_exists():
try:
_ = load_dataset(RESULTS_REPO, split="train", token=HF_TOKEN, download_mode="force_redownload")
return
except Exception as e:
print("[init] results repo missing/empty; creating with sentinel row:", e)
base = Dataset.from_pandas(pd.DataFrame([SENTINEL_ROW]), features=LEADERBOARD_FEATURES)
DatasetDict({"train": base}).push_to_hub(RESULTS_REPO, token=HF_TOKEN)
print("[init] seeded repo with sentinel row.")
# DIR_NOW = os.getcwd()
# print(f"START: {DIR_NOW}")
ensure_results_repo_exists()
ensure_submissions_repo_exists()
def download_file(url, local_path):
try:
r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
r.raise_for_status()
with open(local_path, 'wb') as f:
f.write(r.content)
return local_path
except Exception as e:
print(f"Error downloading file from {url}: {e}")
return None
global_new_row = pd.DataFrame()
def set_new_row(new_row):
global global_new_row
global_new_row = new_row
# print("NEW ROW:", global_new_row) # Debugging
def get_new_row():
return global_new_row
def update_results_dataset(new_row_df: pd.DataFrame):
ensure_results_repo_exists()
# Align columns
new_row_df = new_row_df.reindex(columns=HEADERS)
try:
ds = load_dataset(RESULTS_REPO, split="train", token=HF_TOKEN, download_mode="force_redownload")
df = ds.to_pandas()
except Exception as e:
print("[update] load failed; using sentinel-only base:", e)
df = pd.DataFrame([SENTINEL_ROW])
# Keep schema + append new
for c in HEADERS:
if c not in df.columns:
df[c] = []
df = df[HEADERS]
updated_df = pd.concat([df, new_row_df], ignore_index=True)
# Push explicitly as train split
updated_ds = Dataset.from_pandas(updated_df, features=LEADERBOARD_FEATURES)
DatasetDict({"train": updated_ds}).push_to_hub(RESULTS_REPO, token=HF_TOKEN)
# print("Pushed updated leaderboard dataset.")
# Return freshly loaded dataset
return load_dataset(RESULTS_REPO, split="train", token=HF_TOKEN)
# HF_TOKEN = os.environ.get("ALL_TOKEN")
# OWNER = "IndoorOutdoor"
# READ_TOKEN = os.environ.get("read_token")
# REPO_ID = f"{OWNER}/leaderboard"
# QUEUE_REPO = f"{OWNER}/requests"
# RESULTS_REPO = f"{OWNER}/results"
# SUBMISSIONS_REPO = f"{OWNER}/submissions"
# ensure_results_repo_exists()
# ensure_submissions_repo_exists()
global_error_message = "Ready for submission!"
def set_error_message(message):
global global_error_message
global_error_message = message
print("ERROR UPDATED:", global_error_message) # Debugging
def get_error_message():
return global_error_message
# def install_requirements(file_path):
# try:
# with open(file_path, "r") as file:
# requirements = file.readlines()
# for req in requirements:
# package = req.strip()
# if package:
# subprocess.run(["pip", "install", package], check=True)
# print(f"Installed: {package}")
# print("All requirements installed successfully.")
# except FileNotFoundError:
# set_error_message(f"Error: {file_path} not found.")
# print(f"Error: {file_path} not found.")
# except subprocess.CalledProcessError as e:
# set_error_message(f"Installation failed, a process error occured: {e}")
# print(f"Installation failed, subprocess called process error: {e}")
def install_requirements(req_file, submission_dir):
venv_dir = os.path.join(submission_dir, ".venv")
python_bin = os.path.join(venv_dir, "bin", "python")
# create venv
subprocess.run(
[sys.executable, "-m", "venv", venv_dir],
check=True
)
# upgrade pip
subprocess.run(
[python_bin, "-m", "pip", "install", "--upgrade", "pip"],
check=True
)
print("no pip cache")
# install user requirements, no cache safer
# subprocess.run(
# [python_bin, "-m", "pip", "install", "-r", req_file],
# check=True
# )
subprocess.run(
[python_bin, "-m", "pip", "install", "--no-cache-dir", "-r", req_file],
check=True
)
print("installed successfully")
return python_bin, venv_dir
# HEADERS = ["Model Name", "Group Name", "Execution Time (s)", "Accuracy", "TP", "FP", "FN", "TN"]
BASE = {'ottawa':(45.30326753851309,-75.93640391349997),
'ali_home':(37.88560412289598,-122.30218612514359),
'josh_home':(37.8697406, -122.30218612514359),
'cory':(37.8697406,-122.281570)}
def get_base(filename):
if "home" in filename:
return BASE["ali_home"]
elif "ottawa" in filename:
return BASE["ottawa"]
elif "josh" in filename:
return BASE["josh_home"]
else:
return BASE["cory"]
def save_submission_zip_and_metadata(zip_src_path, modelname, groupname, stats):
"""
Saves the actual ZIP file (as a file artifact in the HF dataset repo)
and appends a row with timestamp + metrics + zip path to SUBMISSIONS_REPO.
"""
ensure_submissions_repo_exists()
# 1. Human-readable timestamp (local time)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# 2. Standardize ZIP filename in the repo
# e.g. "MyGroup_MyModel.zip"
zip_dest_filename = f"{groupname}_{modelname}.zip"
# 3. Upload the actual zip file as a file in the dataset repo
api = HfApi()
api.upload_file(
path_or_fileobj=zip_src_path,
path_in_repo=zip_dest_filename,
repo_id=SUBMISSIONS_REPO,
repo_type="dataset",
token=HF_TOKEN,
)
# 4. Build a row matching SUBMISSIONS_FEATURES / SUBMISSIONS_HEADERS
row = {
"Timestamp": timestamp,
"Model Name": modelname,
"Group Name": groupname,
"Execution Time (s)": float(stats["Execution Time (s)"]),
"Processing Time (s)": float(stats["Processing Time (s)"]),
"Accuracy": float(stats["Accuracy"]),
"TP": float(stats["TP"]),
"FP": float(stats["FP"]),
"FN": float(stats["FN"]),
"TN": float(stats["TN"]),
"Zip Path": zip_dest_filename,
}
# 5. Load existing submissions dataset (if any)
try:
ds = load_dataset(SUBMISSIONS_REPO, split="train", token=HF_TOKEN)
df = ds.to_pandas()
except Exception as e:
print("[submission] load failed, starting new:", e)
df = pd.DataFrame([SENTINEL_SUBMISSIONS_ROW])
# Ensure columns match
for c in SUBMISSIONS_HEADERS:
if c not in df.columns:
df[c] = []
df = df[SUBMISSIONS_HEADERS]
updated_df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
new_ds = Dataset.from_pandas(updated_df, features=SUBMISSIONS_FEATURES)
DatasetDict({"train": new_ds}).push_to_hub(SUBMISSIONS_REPO, token=HF_TOKEN)
return True
# print(df.head())
def fetch_lb():
print("fetch_lb")
snap0 = snapshot_resources("[app][submit_start]", interval=1.0, watch_dir="/tmp")
monitor_cpu_usage()
try:
ensure_results_repo_exists()
leaderboard_dataset = load_dataset(RESULTS_REPO, split="train", token=HF_TOKEN, download_mode="force_redownload")
df = leaderboard_dataset.to_pandas()
except Exception as e:
print(f"Error loading leaderboard:", e)
df = pd.DataFrame([SENTINEL_ROW])
# Ensure columns exist
for c in HEADERS:
if c not in df.columns:
df[c] = []
df = df[HEADERS]
# Drop the sentinel
df = df[df["Model Name"] != SENTINEL_NAME].copy()
if len(df) == 0:
# print("THIS IS THE LEADERBOARD:\n(empty after filtering sentinel)")
return pd.DataFrame(columns=HEADERS)
df = (
df.sort_values(by=["Accuracy", "Execution Time (s)"], ascending=[False, True])
.drop_duplicates(subset=["Model Name", "Group Name"], keep="first")
.reset_index(drop=True)
)
df.insert(0, "Rank", df.index + 1)
# print(f"THIS IS THE LEADERBOARD:\n{df}")
return df
leaderboard_data = fetch_lb()
def compute_stats_sector(sectors_model, sector_gt):
TP = FP = FN = TN = 0
ignored = 0
for i in range(len(sector_gt)):
if sector_gt[i] == 1:
if sectors_model[i] > 0 or sectors_model[(i+1) % 8] > 0 or sectors_model[(i-1) % 8] > 0 :
TP += 1
else:
FN += 1
else:
if sectors_model[i] > 0:
if sector_gt[(i-1) % 8] > 0 or sector_gt[(i+1) % 8] > 0:
TP += 1
continue
FP += 1
else:
TN += 1
NUM_SECTORS = 8 - ignored
return [TP / NUM_SECTORS, FP / NUM_SECTORS, FN / NUM_SECTORS, TN / NUM_SECTORS]
#Compare the model output with ground truth
#return TP, FP, FN, TN
#This fuction compute stats when the model is binary i.e., outputs only indoor vs outdoor
def compute_stats_in_out(sectors_model, indoor_gt):
if indoor_gt: #if groundtruth is indoor
for i in range(len(sectors_model)):
if sectors_model[i]:
return [0,1,0,0]
return [0,0,0,1]
else: #if outdoor
for i in range(len(sectors_model)):
if sectors_model[i]:
return [1,0,0,0]
return [0,0,1,0]
def read_configuration(filename):
with open(filename, 'r') as file:
data = file.read().split('\n')
data = data[1:] # ignore the header
exp = {}
for line in data:
if len(line) == 0:
continue
tokens =line.split(',')
file = tokens[0]
scenario = tokens[1]
indoor = True if tokens[2] == "TRUE" else 0
exp[scenario] = {'sectors':[1 if x == "TRUE" else 0 for x in tokens[3:]], 'indoor':indoor, "file":file}
return exp
def evaluate_model(username, groupname, file, submission_python, submit_time):
print("evaluating...")
global leaderboard_data
username = username.strip()
if not username:
return leaderboard_data.values.tolist(), False
# script_path = f"submissions/{username}.py"
# os.makedirs("submissions", exist_ok=True)
try:
meta_local = hf_hub_download(
repo_id="IndoorOutdoor/metadata",
filename="metadata.csv",
repo_type="dataset",
token=HF_TOKEN
)
exp = read_configuration(meta_local)
# exp = read_configuration("metadata.csv")
start_time = time.time()
stats_model_sectors = []
stats_model_in_out = []
keys = list(exp.keys())
random.shuffle(keys)
for key in keys:
filename = exp[key]['file']
indoor_gt = exp[key]['indoor']
sectors_gt = exp[key]["sectors"]
filename = filename + ".txt"
print("FILE TO PROCESS:", filename)
local_file_path = hf_hub_download(repo_id="IndoorOutdoor/metadata",
filename=filename,
repo_type="dataset",
token=HF_TOKEN)
# sectors_model = import_and_run_function(file, "evaluate", local_file_path, submission_python)
sectors_model = import_and_run_function(
script_path=file,
function_name="evaluate",
filename=local_file_path,
submission_python=submission_python
)
if sectors_model == None:
return None, False
try:
os.remove(local_file_path)
except Exception as e:
print(f"Warning: Couldn't delete {local_file_path}{e}")
print("SECTORS MODEL: ", sectors_model)
stats_model_sectors.append(compute_stats_sector(sectors_model, sectors_gt))
stats_model_in_out.append(compute_stats_in_out(sectors_model, indoor_gt))
print("MONITOR CPU USAGE: ")
monitor_cpu_usage()
execution_time = round(time.time() - start_time, 4)
print("calculating summary stats")
TP = np.mean([x[0] for x in stats_model_sectors])
FP = np.mean([x[1] for x in stats_model_sectors])
FN = np.mean([x[2] for x in stats_model_sectors])
TN = np.mean([x[3] for x in stats_model_sectors])
# print("calculating exec stats")
accuracy = round((TP + TN) / (TP + TN + FP + FN), 2)
status = "Success" if accuracy > 0 else "Incorrect Model"
except Exception as e:
leaderboard_data = pd.concat([leaderboard_data, pd.DataFrame([[username, groupname, float("inf"), 0,-1,-1,-1,-1]],
columns=HEADERS)], ignore_index=True)
set_error_message(f"An error occured while evaluating: {str(e)}")
return leaderboard_data.values.tolist(), False
# print("calculating new entry")
proc_time = round(time.time() - (submit_time), 4)
new_entry = pd.DataFrame([[username, groupname, round(execution_time,2), round(proc_time,2), round(accuracy,2), round(TP,2), round(FP,2), round(FN,2), round(TN,2)]],
columns=HEADERS)
# print("updating new entry")
set_new_row(new_entry)
leaderboard_data = update_results_dataset(new_entry)
leaderboard_data = leaderboard_data.to_pandas() if leaderboard_data is not None else None
if leaderboard_data is not None:
leaderboard_data = leaderboard_data.sort_values(by=["Accuracy", "Execution Time (s)"], ascending=[False, True]).reset_index(drop=True)
return leaderboard_data.values.tolist(), True
def import_and_run_function(script_path, function_name, filename, submission_python):
APP_DIR = os.path.dirname(os.path.abspath(__file__))
RUNNER_PATH = os.path.join(APP_DIR, "runner.py")
venv_dir = os.path.dirname(os.path.dirname(submission_python))
print(f"venv: {venv_dir}")
if not os.path.exists(RUNNER_PATH):
# more for dev than users
set_error_message(f"Error: runner.py not found at {RUNNER_PATH}")
return None
if not os.path.exists(script_path):
set_error_message("Error: Script not found.")
return None
if not script_path.endswith(".py"):
set_error_message("Error: Provided file is not a Python script.")
return None
if not os.path.exists(filename):
set_error_message("Error: Input data file not found.")
return None
submission_dir = os.path.dirname(os.path.abspath(script_path))
try:
with tempfile.TemporaryDirectory() as tmp:
tmp_dir = tmp
sandbox_input = os.path.join(tmp, "input.txt")
sandbox_output = os.path.join(tmp, "output.json")
sandbox_runner = os.path.join(tmp, "runner.py")
shutil.copy(RUNNER_PATH, sandbox_runner)
# copy private file into temp sandbox area
shutil.copy(filename, sandbox_input)
# explain each arg!!!!!
print(f"submission_python: {submission_python}")
snap1 = snapshot_resources("[app][after_pip]", interval=1.0, watch_dir="/tmp")
cmd = [
"bwrap",
"--unshare-net",
"--unshare-pid",
"--die-with-parent",
# --- hard isolation of environment ---
"--clearenv",
"--setenv", "PYTHONUNBUFFERED", "1",
# "--setenv", "PATH", os.environ.get("PATH", ""),
"--setenv", "HOME", tmp_dir,
"--setenv", "XDG_CACHE_HOME", os.path.join(tmp_dir, ".cache"),
"--setenv", "HF_HOME", os.path.join(tmp_dir, ".hf"),
# "--setenv", "PATH", os.path.join(submission_python, "bin"),
"--setenv", "PATH", os.path.join(venv_dir, "bin"),
"--setenv", "LOKY_MAX_CPU_COUNT", "1",
"--setenv", "OMP_NUM_THREADS", "1",
"--setenv", "MKL_NUM_THREADS", "1",
"--setenv", "OPENBLAS_NUM_THREADS", "1",
"--setenv", "NUMEXPR_NUM_THREADS", "1",
# --- minimal system mounts so python works ---
"--ro-bind", "/usr", "/usr",
"--ro-bind", "/usr/local", "/usr/local",
"--ro-bind", "/lib", "/lib",
"--ro-bind", "/lib64", "/lib64",
"--bind", submission_python, submission_python,
"--ro-bind", venv_dir, venv_dir,
# --- kernel interfaces ---
"--ro-bind", "/proc", "/proc",
"--dev", "/dev",
# --- writable temp ---
"--tmpfs", "/tmp",
"--tmpfs", "/etc",
"--setenv", "TMPDIR", "/tmp",
# --- ONLY expose what student code needs ---
"--bind", submission_dir, submission_dir,
"--bind", tmp_dir, tmp_dir,
"--chdir", submission_dir,
# --- IMPORTANT: run submission venv python ---
submission_python, sandbox_runner,
"--submission_dir", submission_dir,
"--script", os.path.abspath(script_path),
"--function", function_name,
"--input", sandbox_input,
"--output", sandbox_output,
]
safe_env = {
"PYTHONUNBUFFERED": "1",
"PATH": os.environ.get("PATH", ""),
}
# print("[DEBUG] running cmd:", " ".join(cmd))
# print("[DEBUG] subprocess cwd:", submission_dir)
# print("[DEBUG] RUNNER_PATH:", RUNNER_PATH)
# print("[DEBUG] script_path:", os.path.abspath(script_path))
# print("[DEBUG] sandbox_input:", sandbox_input)
# print("[DEBUG] sandbox_output:", sandbox_output)
# print("[DEBUG] python_bin:", submission_python)
completed = subprocess.run(
cmd,
cwd=submission_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=10000,
)
print("===== SANDBOX STDOUT =====", flush=True)
print(completed.stdout or "", flush=True)
print("===== SANDBOX STDERR =====", flush=True)
print(completed.stderr or "", flush=True)
if completed.returncode != 0:
err = (completed.stderr or completed.stdout or "").strip()
err = err[-2000:] # truncate
set_error_message(f"Error: Function raised an error during execution. {err}")
return None
if not os.path.exists(sandbox_output):
set_error_message("Error: Submission did not produce an output file.")
return None
with open(sandbox_output, "r") as f:
result = json.load(f)
except subprocess.TimeoutExpired:
set_error_message("Error: Submission timed out.")
return None
except FileNotFoundError as e:
set_error_message(f"Error: Missing system dependency: {e}")
return None
except Exception as e:
set_error_message(f"Error: Failed to run submission safely: {str(e)}")
return None
# validation of output
if not isinstance(result, list):
set_error_message("Error: Function must return a list.")
return None
if len(result) != 8:
set_error_message("Error: Function must return a list of exactly 8 elements.")
return None
if not all(isinstance(x, int) and x in [0, 1] for x in result):
set_error_message(f"Function '{function_name}' must return a list of 8 integers, each 0 or 1.")
return None
print(f"Function '{function_name}' executed successfully. Output: {result}")
return result
def update_leaderboard(username, groupname, zip_file, submit_time):
print("Updating Leaderboard...")
evaluated = True
if not zip_file:
set_error_message("No file uploaded.")
return get_error_message(), None
zip_path = zip_file.name
# extract_path = os.path.join("")
submission_dir = tempfile.mkdtemp(prefix="submission_")
# extract_path = os.getcwd()
# print(f"UPDATE: {DIR_NOW}")
# print(f"EXTRACCt PATH: {extract_path}")
# try:
# if not os.path.exists(extract_path):
# os.makedirs(extract_path)
# except OSError:
# set_error_message("Error creating directory for extraction.")
# return get_error_message(), None
try:
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(submission_dir)
except zipfile.BadZipFile:
set_error_message("Invalid ZIP file.")
return "Invalid ZIP file.", None
except Exception as e:
set_error_message(f"Error extracting ZIP file: {str(e)}")
return f"Error extracting ZIP file: {str(e)}", None
extracted_files = os.listdir(submission_dir)
print("EXTRACTED FILES:", extracted_files)
req_file = os.path.join(submission_dir, "user_reqs.txt")
# print(extract_path)
if "user_reqs.txt" not in extracted_files:
set_error_message("Missing user_reqs.txt in ZIP file.")
return "Missing user_reqs.txt in ZIP file.", None
try:
# install_requirements(req_file)
print("Installing dependencies...")
submission_python, venv_dir = install_requirements(req_file, submission_dir)
print("Dependencies installed successfully.")
except Exception as e:
set_error_message(f"Error installing dependencies: {str(e)}")
return f"Error installing dependencies: {str(e)}", None
python_script = os.path.join(submission_dir, "main.py")
if "main.py" not in extracted_files:
set_error_message("No Python script (main.py) found in ZIP.")
return "No Python script (main.py) found in ZIP.", None
try:
print("Starting evaluation...")
updated_leaderboard, evaluated = evaluate_model(username, groupname, python_script, submission_python, submit_time)
if updated_leaderboard == None:
return "An error occured while evaluating the model", None
if evaluated == False:
return "An error occured while evaluating the model", None
except Exception as e:
print("Error in eval mode:", str(e))
set_error_message(f"Error evaluating model: {str(e)}")
return f"Error evaluating model: {str(e)}", None
finally:
try:
shutil.rmtree(submission_dir)
except Exception as e:
print(f"[cleanup warning] {e}")
# df = fetch_lb()
# your_submission = df.loc[(df['Model Name'] == username) & (df['Group Name'] == groupname)].values
# # print(your_submission)
# # print(f"list {list(your_submission)}")
# set_error_message(tabulate(list(your_submission), headers=["Rank", "Model", "Group", "Time (s)", "Accuracy", "TP", "FP", "FN", "TN"]))
# return "Submission successful!", fetch_lb().head(n=10)
df = fetch_lb()
# Find this user's row in the leaderboard
print("LOOKUP username repr:", repr(username))
print("LOOKUP groupname repr:", repr(groupname))
print("DF unique model/group pairs (repr):")
for a, b in df[["Model Name", "Group Name"]].drop_duplicates().values.tolist():
print(" ", repr(a), repr(b))
row_df = df.loc[(df["Model Name"] == username) & (df["Group Name"] == groupname)]
if not row_df.empty:
row = row_df.iloc[0]
stats = {
"Execution Time (s)": row["Execution Time (s)"],
"Processing Time (s)": row["Processing Time (s)"],
"Accuracy": row["Accuracy"],
"TP": row["TP"],
"FP": row["FP"],
"FN": row["FN"],
"TN": row["TN"],
}
# Save the original uploaded zip plus metadata
# zip_file.name is the local path of the uploaded file
save_submission_zip_and_metadata(zip_file.name, username, groupname, stats)
# For the status box
your_submission = row_df.values
set_error_message(
tabulate(
list(your_submission),
headers=["Rank", "Model", "Group", "Time (s)", "Accuracy", "TP", "FP", "FN", "TN"],
)
)
else:
# Shouldn't really happen if evaluation succeeded, but be safe
set_error_message("Warning: could not find submission in leaderboard to log stats.")
return "Submission successful!", df.head(n=10)
def check_if_overwrite(modelname, groupname):
try:
ensure_results_repo_exists()
ds = load_dataset(RESULTS_REPO, split="train", token=HF_TOKEN, download_mode="force_redownload")
df = ds.to_pandas()
except Exception as e:
print("[overwrite] load failed:", e)
return False
for c in HEADERS:
if c not in df.columns:
df[c] = []
df = df[df["Model Name"] != SENTINEL_NAME] # ignore sentinel
if df.empty:
return False
condition = (df['Model Name'] == modelname) & (df['Group Name'] == groupname)
return condition.any()
def show_button():
return overwrite_button.update(visible=True)
with gr.Blocks() as demo:
gr.HTML("""
<style>
.narrow-dropdown .wrap {
max-width: 200px;
margin-left: 0;
}
</style>
""")
gr.Markdown("# 🚀 Indoor vs Outdoor Detection Leaderboard \nUsing the provided dataset, submit a model that can predict if a device is inside or outside. \nSee the README for submission details.")
with gr.Tabs():
# -------- Leaderboard TAB --------
with gr.Tab("Leaderboard"):
gr.Markdown("Using the provided dataset, submit a model that can predict if a device is inside or outside. \nSee the README for submission details.")
with gr.Row():
username_input = gr.Textbox(label="Model Name")
groupname_input = gr.Textbox(label="Group Name")
file_input = gr.File(label="Upload Zip File")
submit_button = gr.Button("Submit File")
status_output = gr.Textbox(label="Status", interactive=False)
overwrite_button = gr.Button("Overwrite", variant='primary', visible=False)
overwrite_state = gr.State(False)
temp_modelname = gr.State("")
temp_groupname = gr.State("")
temp_file = gr.State(None)
client_submit_ms = gr.Number(value=0, visible=False, label="client_submit_ms") # NEW
with gr.Row():
display_option = gr.Dropdown(
choices=["Top 10", "Top 20", "All"],
value="Top 10",
label="Leaderboard View",
elem_classes=["narrow-dropdown"]
)
with gr.Row():
leaderboard_display = gr.Dataframe(
headers=HEADERS,
value=fetch_lb().head(n=10),
label="Leaderboard"
)
# -------- Submission Requirements TAB --------
with gr.Tab("Submission Requirements"):
gr.Markdown("""
## ✅ Submission Requirements
Please follow these rules before uploading your model:
1. **File Format**: Upload a `.zip` file containing your model.
2. **Requirements File**: You must have your requirements file named "user_reqs.txt". Ensure that you include the least number of requirements to minimize runtime.
2. **File Naming**: The zip file must include a Python file name main.py with a function called evaluate with one parameter called "filepath".
2. **Path Building**: Do not hardcode any paths in your code. Programmatically build file paths using methods such as os.path.join, pathlib.Path to avoid hardcoding.
3. **Model Name & Group Name**:
- Model Name must be unique to your group (unless you choose to overwrite).
- Group Name should identify your team/institution.
4. **Evaluation**: Your model will be tested against the hidden validation set.
5. **Leaderboard**: Results will be displayed automatically after processing.
6. **Overwrite Policy**: If the same model name already exists, you can overwrite it by confirming.
7.
📌 See the [README](https://huggingface.co/spaces/IndoorOutdoor/Indoor-Outdoor-Detection-Leaderboard/blob/main/README.md) for detailed instructions.
""")
# with gr.Row():
# username_input = gr.Textbox(label="Model Name")
# groupname_input = gr.Textbox(label = "Group Name")
# file_input = gr.File(label="Upload Zip File")
# submit_button = gr.Button("Submit File")
# status_output = gr.Textbox(label="Status", interactive=False)
# overwrite_button = gr.Button("Overwrite", variant='primary', visible=False)
# overwrite_state = gr.State(False)
# temp_modelname = gr.State("")
# temp_groupname = gr.State("")
# temp_file = gr.State(None)
# with gr.Row():
# display_option = gr.Dropdown(choices=["Top 10", "Top 20", "All"], value="Top 10", label="Leaderboard View", elem_classes=["narrow-dropdown"])
# with gr.Row():
# leaderboard_display = gr.Dataframe(
# headers=HEADERS,
# value=fetch_lb().head(n=10),
# label="Leaderboard"
# )
def handle_submission(modelname, groupname, file, client_submit_ms): # NEW
# client_submit_ms is JS Date.now() in ms since epoch
server_start_ms = int(time.time() * 1000)
click_ms = int(float(client_submit_ms)) if client_submit_ms else server_start_ms
approx_queue_wait_ms = server_start_ms - click_ms
submit_time = click_ms / 1000.0 #
print("[TIME] click_ms:", click_ms, "server_start_ms:", server_start_ms,
"approx_queue_wait_ms:", approx_queue_wait_ms, flush=True)
print("[DEBUG] handle_submission fired", repr(modelname), repr(groupname),
bool(file), click_ms, flush=True)
modelname = (modelname or "").strip()
groupname = (groupname or "").strip()
print("[DEBUG] handle_submission fired", modelname, groupname, bool(file), client_submit_ms, flush=True)
if check_if_overwrite(modelname, groupname):
set_error_message("Model already exists. Click 'Overwrite Submission' to continue or use a new model name.")
return (
"Model already exists. Click 'Overwrite Submission' to continue or use a new model name.",
gr.update(visible=True), # Show overwrite button
False, # don't allow overwrite yet
modelname,
groupname,
file, None
)
else:
status_message, leaderboard_data = update_leaderboard(modelname, groupname, file, submit_time)
return (
"Model submitted successfully.",
gr.update(visible=False), # keep overwrite hidden
False,
"", "", None, leaderboard_data
)
def handle_overwrite(modelname, groupname, file, client_submit_ms): # NEW
# submit_time = time.time()
# submit_time = float(client_submit_ms) / 1000.0 # NEW
submit_time = (float(client_submit_ms) / 1000.0) if client_submit_ms else time.time()
status_message, leaderboard = update_leaderboard(modelname, groupname, file, submit_time)
return status_message, gr.update(visible=False), leaderboard
status_output.change(fn=get_error_message, inputs=[], outputs=status_output)
# submit_button.click(fn=handle_submission,
# inputs=[username_input, groupname_input, file_input],
# outputs=[status_output, overwrite_button, overwrite_state, temp_modelname,temp_groupname,temp_file,leaderboard_display])
submit_button.click(
fn=handle_submission,
inputs=[username_input, groupname_input, file_input, client_submit_ms], # <-- add
outputs=[status_output, overwrite_button, overwrite_state,
temp_modelname, temp_groupname, temp_file, leaderboard_display],
js="(m,g,f,ts) => [m, g, f, Date.now()]", # <-- inject click timestamp
)
# overwrite_button.click(
# fn=handle_overwrite,
# inputs=[temp_modelname, temp_groupname, temp_file],
# outputs=[status_output, overwrite_button, leaderboard_display]
# )
overwrite_button.click(
fn=handle_overwrite,
inputs=[temp_modelname, temp_groupname, temp_file, client_submit_ms], # <-- add
outputs=[status_output, overwrite_button, leaderboard_display],
js="(m,g,f,ts) => [m, g, f, Date.now()]", # <-- inject click timestamp
)
def update_display(view_choice):
df = fetch_lb()
if view_choice == "Top 10":
return df.head(10)
elif view_choice == "Top 20":
return df.head(20)
else:
return df
display_option.change(
fn=update_display,
inputs=display_option,
outputs=leaderboard_display
)
demo.load(
fn=update_display,
inputs=display_option,
outputs=leaderboard_display
)
demo.queue(default_concurrency_limit=1) # or demo.queue()
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
# demo.launch()