vedkdev commited on
Commit
283dfb9
·
verified ·
1 Parent(s): 60d72cc

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. server/ui.py +202 -5
  2. validate-submission.sh +185 -0
server/ui.py CHANGED
@@ -143,7 +143,7 @@ def render_home_page() -> str:
143
  color: var(--muted);
144
  }
145
 
146
- input {
147
  width: 100%;
148
  border: 1px solid rgba(18, 88, 63, 0.22);
149
  border-radius: 10px;
@@ -154,12 +154,82 @@ def render_home_page() -> str:
154
  transition: border-color 180ms ease, box-shadow 180ms ease;
155
  }
156
 
157
- input:focus {
158
  outline: none;
159
  border-color: var(--accent);
160
  box-shadow: 0 0 0 4px rgba(15, 139, 99, 0.14);
161
  }
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  .actions {
164
  margin-top: 6px;
165
  display: flex;
@@ -318,7 +388,7 @@ def render_home_page() -> str:
318
  <section class="hero">
319
  <span class="eyebrow"><span class="dot"></span>FlakySleuth Space</span>
320
  <h1>Run Inference From The Browser</h1>
321
- <p>Launch <code>inference.py</code>, monitor stdout live, and inspect episode summaries without leaving this Space. The API endpoints for OpenEnv stay available at the same time.</p>
322
  </section>
323
 
324
  <section class="panel-grid">
@@ -341,8 +411,18 @@ def render_home_page() -> str:
341
  </div>
342
 
343
  <div class="field span-2">
344
- <label for="task_types">Task Types (comma-separated)</label>
345
- <input id="task_types" name="task_types" value="classify,root_cause,fix_proposal" />
 
 
 
 
 
 
 
 
 
 
346
  </div>
347
 
348
  <div class="field span-2">
@@ -410,6 +490,106 @@ def render_home_page() -> str:
410
  const logEl = document.getElementById("log-output");
411
  const logCountEl = document.getElementById("log-count");
412
  const summaryEl = document.getElementById("summary-line");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
 
414
  function readFormPayload() {
415
  return {
@@ -480,6 +660,10 @@ def render_home_page() -> str:
480
  async function startRun() {
481
  runButton.disabled = true;
482
  try {
 
 
 
 
483
  const response = await fetch("/web/inference/start", {
484
  method: "POST",
485
  headers: { "Content-Type": "application/json" },
@@ -497,6 +681,10 @@ def render_home_page() -> str:
497
  alert("Could not start inference. Check logs and try again.");
498
  } finally {
499
  form.api_key.value = "";
 
 
 
 
500
  }
501
  }
502
 
@@ -512,6 +700,15 @@ def render_home_page() -> str:
512
 
513
  runButton.addEventListener("click", startRun);
514
  stopButton.addEventListener("click", stopRun);
 
 
 
 
 
 
 
 
 
515
 
516
  fetchStatus();
517
  window.setInterval(fetchStatus, 2200);
 
143
  color: var(--muted);
144
  }
145
 
146
+ input, select {
147
  width: 100%;
148
  border: 1px solid rgba(18, 88, 63, 0.22);
149
  border-radius: 10px;
 
154
  transition: border-color 180ms ease, box-shadow 180ms ease;
155
  }
156
 
157
+ input:focus, select:focus {
158
  outline: none;
159
  border-color: var(--accent);
160
  box-shadow: 0 0 0 4px rgba(15, 139, 99, 0.14);
161
  }
162
 
163
+ .task-picker {
164
+ display: grid;
165
+ gap: 8px;
166
+ padding: 10px;
167
+ border-radius: 12px;
168
+ border: 1px solid rgba(15, 139, 99, 0.22);
169
+ background: rgba(255, 255, 255, 0.85);
170
+ }
171
+
172
+ .task-picker-row {
173
+ display: grid;
174
+ grid-template-columns: 1fr auto;
175
+ gap: 8px;
176
+ }
177
+
178
+ .btn-add-task {
179
+ background: #1c6f50;
180
+ color: #fff;
181
+ padding: 0 14px;
182
+ }
183
+
184
+ .task-chips {
185
+ display: flex;
186
+ gap: 8px;
187
+ flex-wrap: wrap;
188
+ min-height: 32px;
189
+ align-items: center;
190
+ }
191
+
192
+ .task-chip {
193
+ display: inline-flex;
194
+ align-items: center;
195
+ gap: 8px;
196
+ border: 1px solid rgba(20, 67, 50, 0.32);
197
+ border-radius: 999px;
198
+ background: #fff8dc;
199
+ color: #2a362f;
200
+ padding: 6px 10px;
201
+ font: 600 12px/1 var(--display);
202
+ }
203
+
204
+ .task-chip button {
205
+ border: 0;
206
+ background: transparent;
207
+ color: #4a2a12;
208
+ padding: 0;
209
+ border-radius: 0;
210
+ min-width: unset;
211
+ font: 700 12px/1 var(--display);
212
+ line-height: 1;
213
+ cursor: pointer;
214
+ }
215
+
216
+ .task-chip button:hover {
217
+ transform: none;
218
+ filter: brightness(0.92);
219
+ }
220
+
221
+ .task-empty {
222
+ font: 500 12px/1.2 var(--mono);
223
+ color: #52695f;
224
+ }
225
+
226
+ .field-note {
227
+ margin: 0;
228
+ color: #4e655b;
229
+ font-size: 12px;
230
+ line-height: 1.4;
231
+ }
232
+
233
  .actions {
234
  margin-top: 6px;
235
  display: flex;
 
388
  <section class="hero">
389
  <span class="eyebrow"><span class="dot"></span>FlakySleuth Space</span>
390
  <h1>Run Inference From The Browser</h1>
391
+ <p>This evaluator console runs benchmark episodes for <strong>classification</strong>, <strong>root-cause identification</strong>, and <strong>fix proposal</strong>. Use it to review logs, score trends, and reproducible run settings while judging submission quality.</p>
392
  </section>
393
 
394
  <section class="panel-grid">
 
411
  </div>
412
 
413
  <div class="field span-2">
414
+ <label for="task-type-select">Task Types</label>
415
+ <div class="task-picker">
416
+ <div id="task-chips" class="task-chips"></div>
417
+ <div class="task-picker-row">
418
+ <select id="task-type-select">
419
+ <option value="">Choose task type</option>
420
+ </select>
421
+ <button id="btn-add-task" class="btn-add-task" type="button">Add</button>
422
+ </div>
423
+ <input id="task_types" name="task_types" type="hidden" value="classify,root_cause,fix_proposal" />
424
+ </div>
425
+ <p class="field-note">Add from dropdown, remove with <code>x</code> on each chip.</p>
426
  </div>
427
 
428
  <div class="field span-2">
 
490
  const logEl = document.getElementById("log-output");
491
  const logCountEl = document.getElementById("log-count");
492
  const summaryEl = document.getElementById("summary-line");
493
+ const taskInput = document.getElementById("task_types");
494
+ const taskChipsEl = document.getElementById("task-chips");
495
+ const taskSelectEl = document.getElementById("task-type-select");
496
+ const taskAddButton = document.getElementById("btn-add-task");
497
+
498
+ const TASK_TYPE_ORDER = ["classify", "root_cause", "fix_proposal"];
499
+ const TASK_TYPE_LABELS = {
500
+ classify: "Classify",
501
+ root_cause: "Root Cause",
502
+ fix_proposal: "Fix Proposal",
503
+ };
504
+
505
+ function parseTaskTypes(raw) {
506
+ const tokens = String(raw || "")
507
+ .split(",")
508
+ .map((token) => token.trim())
509
+ .filter(Boolean);
510
+ const unique = [];
511
+ for (const token of tokens) {
512
+ if (TASK_TYPE_ORDER.includes(token) && !unique.includes(token)) {
513
+ unique.push(token);
514
+ }
515
+ }
516
+ return unique;
517
+ }
518
+
519
+ let selectedTaskTypes = parseTaskTypes(taskInput.value);
520
+ if (!selectedTaskTypes.length) {
521
+ selectedTaskTypes = [...TASK_TYPE_ORDER];
522
+ }
523
+
524
+ function renderTaskSelect() {
525
+ taskSelectEl.innerHTML = "";
526
+
527
+ const placeholder = document.createElement("option");
528
+ placeholder.value = "";
529
+ placeholder.textContent = "Choose task type";
530
+ placeholder.selected = true;
531
+ taskSelectEl.appendChild(placeholder);
532
+
533
+ for (const taskType of TASK_TYPE_ORDER) {
534
+ if (selectedTaskTypes.includes(taskType)) continue;
535
+ const option = document.createElement("option");
536
+ option.value = taskType;
537
+ option.textContent = TASK_TYPE_LABELS[taskType];
538
+ taskSelectEl.appendChild(option);
539
+ }
540
+
541
+ const hasChoices = taskSelectEl.options.length > 1;
542
+ taskSelectEl.disabled = !hasChoices;
543
+ taskAddButton.disabled = !hasChoices;
544
+ }
545
+
546
+ function renderTaskChips() {
547
+ taskChipsEl.innerHTML = "";
548
+
549
+ if (!selectedTaskTypes.length) {
550
+ const hint = document.createElement("span");
551
+ hint.className = "task-empty";
552
+ hint.textContent = "No task selected yet.";
553
+ taskChipsEl.appendChild(hint);
554
+ return;
555
+ }
556
+
557
+ for (const taskType of selectedTaskTypes) {
558
+ const chip = document.createElement("span");
559
+ chip.className = "task-chip";
560
+
561
+ const chipText = document.createElement("span");
562
+ chipText.textContent = TASK_TYPE_LABELS[taskType] || taskType;
563
+
564
+ const chipRemove = document.createElement("button");
565
+ chipRemove.type = "button";
566
+ chipRemove.textContent = "x";
567
+ chipRemove.setAttribute("aria-label", `Remove ${chipText.textContent}`);
568
+ chipRemove.addEventListener("click", () => {
569
+ selectedTaskTypes = selectedTaskTypes.filter((value) => value !== taskType);
570
+ syncTaskTypes();
571
+ });
572
+
573
+ chip.appendChild(chipText);
574
+ chip.appendChild(chipRemove);
575
+ taskChipsEl.appendChild(chip);
576
+ }
577
+ }
578
+
579
+ function syncTaskTypes() {
580
+ taskInput.value = selectedTaskTypes.join(",");
581
+ renderTaskChips();
582
+ renderTaskSelect();
583
+ }
584
+
585
+ function addSelectedTaskType() {
586
+ const selected = taskSelectEl.value.trim();
587
+ if (!selected) return;
588
+ if (!selectedTaskTypes.includes(selected)) {
589
+ selectedTaskTypes.push(selected);
590
+ }
591
+ syncTaskTypes();
592
+ }
593
 
594
  function readFormPayload() {
595
  return {
 
660
  async function startRun() {
661
  runButton.disabled = true;
662
  try {
663
+ if (!selectedTaskTypes.length) {
664
+ alert("Please add at least one task type.");
665
+ return;
666
+ }
667
  const response = await fetch("/web/inference/start", {
668
  method: "POST",
669
  headers: { "Content-Type": "application/json" },
 
681
  alert("Could not start inference. Check logs and try again.");
682
  } finally {
683
  form.api_key.value = "";
684
+ const isActive = ["running", "starting"].includes((statusText.textContent || "").toLowerCase());
685
+ if (!isActive) {
686
+ runButton.disabled = false;
687
+ }
688
  }
689
  }
690
 
 
700
 
701
  runButton.addEventListener("click", startRun);
702
  stopButton.addEventListener("click", stopRun);
703
+ taskAddButton.addEventListener("click", addSelectedTaskType);
704
+ taskSelectEl.addEventListener("keydown", (event) => {
705
+ if (event.key === "Enter") {
706
+ event.preventDefault();
707
+ addSelectedTaskType();
708
+ }
709
+ });
710
+
711
+ syncTaskTypes();
712
 
713
  fetchStatus();
714
  window.setInterval(fetchStatus, 2200);
validate-submission.sh ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # validate-submission.sh — OpenEnv Submission Validator
4
+ #
5
+ # Checks that your HF Space is live, Docker image builds, and openenv validate passes.
6
+ #
7
+ # Prerequisites:
8
+ # - Docker: https://docs.docker.com/get-docker/
9
+ # - openenv-core: pip install openenv-core
10
+ # - curl (usually pre-installed)
11
+ #
12
+ # Run:
13
+ # curl -fsSL https://raw.githubusercontent.com/<owner>/<repo>/main/scripts/validate-submission.sh | bash -s -- <ping_url> [repo_dir]
14
+ #
15
+ # Or download and run locally:
16
+ # chmod +x validate-submission.sh
17
+ # ./validate-submission.sh <ping_url> [repo_dir]
18
+ #
19
+ # Arguments:
20
+ # ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)
21
+ # repo_dir Path to your repo (default: current directory)
22
+ #
23
+ # Examples:
24
+ # ./validate-submission.sh https://my-team.hf.space
25
+ # ./validate-submission.sh https://my-team.hf.space ./my-repo
26
+ #
27
+
28
+ set -uo pipefail
29
+
30
+ DOCKER_BUILD_TIMEOUT=600
31
+ if [ -t 1 ]; then
32
+ RED='\033[0;31m'
33
+ GREEN='\033[0;32m'
34
+ YELLOW='\033[1;33m'
35
+ BOLD='\033[1m'
36
+ NC='\033[0m'
37
+ else
38
+ RED='' GREEN='' YELLOW='' BOLD='' NC=''
39
+ fi
40
+
41
+ run_with_timeout() {
42
+ local secs="$1"; shift
43
+ if command -v timeout &>/dev/null; then
44
+ timeout "$secs" "$@"
45
+ elif command -v gtimeout &>/dev/null; then
46
+ gtimeout "$secs" "$@"
47
+ else
48
+ "$@" &
49
+ local pid=$!
50
+ ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
51
+ local watcher=$!
52
+ wait "$pid" 2>/dev/null
53
+ local rc=$?
54
+ kill "$watcher" 2>/dev/null
55
+ wait "$watcher" 2>/dev/null
56
+ return $rc
57
+ fi
58
+ }
59
+
60
+ portable_mktemp() {
61
+ local prefix="${1:-validate}"
62
+ mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
63
+ }
64
+
65
+ CLEANUP_FILES=()
66
+ cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
67
+ trap cleanup EXIT
68
+
69
+ PING_URL="${1:-}"
70
+ REPO_DIR="${2:-.}"
71
+
72
+ if [ -z "$PING_URL" ]; then
73
+ printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
74
+ printf "\n"
75
+ printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
76
+ printf " repo_dir Path to your repo (default: current directory)\n"
77
+ exit 1
78
+ fi
79
+
80
+ if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
81
+ printf "Error: directory '%s' not found\n" "${2:-.}"
82
+ exit 1
83
+ fi
84
+ PING_URL="${PING_URL%/}"
85
+ export PING_URL
86
+ PASS=0
87
+
88
+ log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
89
+ pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
90
+ fail() { log "${RED}FAILED${NC} -- $1"; }
91
+ hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; }
92
+ stop_at() {
93
+ printf "\n"
94
+ printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
95
+ exit 1
96
+ }
97
+
98
+ printf "\n"
99
+ printf "${BOLD}========================================${NC}\n"
100
+ printf "${BOLD} OpenEnv Submission Validator${NC}\n"
101
+ printf "${BOLD}========================================${NC}\n"
102
+ log "Repo: $REPO_DIR"
103
+ log "Ping URL: $PING_URL"
104
+ printf "\n"
105
+
106
+ log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
107
+
108
+ CURL_OUTPUT=$(portable_mktemp "validate-curl")
109
+ CLEANUP_FILES+=("$CURL_OUTPUT")
110
+ HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
111
+ -H "Content-Type: application/json" -d '{}' \
112
+ "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
113
+
114
+ if [ "$HTTP_CODE" = "200" ]; then
115
+ pass "HF Space is live and responds to /reset"
116
+ elif [ "$HTTP_CODE" = "000" ]; then
117
+ fail "HF Space not reachable (connection failed or timed out)"
118
+ hint "Check your network connection and that the Space is running."
119
+ hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
120
+ stop_at "Step 1"
121
+ else
122
+ fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
123
+ hint "Make sure your Space is running and the URL is correct."
124
+ hint "Try opening $PING_URL in your browser first."
125
+ stop_at "Step 1"
126
+ fi
127
+
128
+ log "${BOLD}Step 2/3: Running docker build${NC} ..."
129
+
130
+ if ! command -v docker &>/dev/null; then
131
+ fail "docker command not found"
132
+ hint "Install Docker: https://docs.docker.com/get-docker/"
133
+ stop_at "Step 2"
134
+ fi
135
+
136
+ if [ -f "$REPO_DIR/Dockerfile" ]; then
137
+ DOCKER_CONTEXT="$REPO_DIR"
138
+ elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
139
+ DOCKER_CONTEXT="$REPO_DIR/server"
140
+ else
141
+ fail "No Dockerfile found in repo root or server/ directory"
142
+ stop_at "Step 2"
143
+ fi
144
+
145
+ log " Found Dockerfile in $DOCKER_CONTEXT"
146
+
147
+ BUILD_OK=false
148
+ BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
149
+
150
+ if [ "$BUILD_OK" = true ]; then
151
+ pass "Docker build succeeded"
152
+ else
153
+ fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
154
+ printf "%s\n" "$BUILD_OUTPUT" | tail -20
155
+ stop_at "Step 2"
156
+ fi
157
+
158
+ log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
159
+
160
+ if ! command -v openenv &>/dev/null; then
161
+ fail "openenv command not found"
162
+ hint "Install it: pip install openenv-core"
163
+ stop_at "Step 3"
164
+ fi
165
+
166
+ VALIDATE_OK=false
167
+ VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
168
+
169
+ if [ "$VALIDATE_OK" = true ]; then
170
+ pass "openenv validate passed"
171
+ [ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT"
172
+ else
173
+ fail "openenv validate failed"
174
+ printf "%s\n" "$VALIDATE_OUTPUT"
175
+ stop_at "Step 3"
176
+ fi
177
+
178
+ printf "\n"
179
+ printf "${BOLD}========================================${NC}\n"
180
+ printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
181
+ printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
182
+ printf "${BOLD}========================================${NC}\n"
183
+ printf "\n"
184
+
185
+ exit 0