Publish UMSR autonomous trainer Space
Browse files
app.py
CHANGED
|
@@ -895,10 +895,59 @@ CUSTOM_CSS = """
|
|
| 895 |
padding: 6px;
|
| 896 |
}
|
| 897 |
|
| 898 |
-
.runtime-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 902 |
}
|
| 903 |
|
| 904 |
.live-console {
|
|
@@ -1724,7 +1773,7 @@ SUBTITLE_SCRAMBLE_JS = r"""
|
|
| 1724 |
};
|
| 1725 |
|
| 1726 |
const followTerminal = () => {
|
| 1727 |
-
const body = document.querySelector(".
|
| 1728 |
if (!body) {
|
| 1729 |
window.setTimeout(followTerminal, 150);
|
| 1730 |
return;
|
|
@@ -3132,60 +3181,13 @@ def render_runtime_stream_html(snapshot: dict[str, Any]) -> str:
|
|
| 3132 |
display_text = line
|
| 3133 |
if tone_class == "log-command" and line.startswith("[command]"):
|
| 3134 |
display_text = line.replace("[command]", "", 1).strip()
|
| 3135 |
-
text =
|
| 3136 |
-
prefix_symbol,
|
| 3137 |
line_no = start_index + idx
|
| 3138 |
-
rows.append(
|
| 3139 |
-
"<div class='live-line "
|
| 3140 |
-
+ tone_class
|
| 3141 |
-
+ "'><span class='live-line-no'>"
|
| 3142 |
-
+ html.escape(str(line_no))
|
| 3143 |
-
+ "</span><span class='live-line-text'><span class='live-line-prefix "
|
| 3144 |
-
+ prefix_class
|
| 3145 |
-
+ "'>"
|
| 3146 |
-
+ html.escape(prefix_symbol)
|
| 3147 |
-
+ "</span>"
|
| 3148 |
-
+ text
|
| 3149 |
-
+ "</span></div>"
|
| 3150 |
-
)
|
| 3151 |
|
| 3152 |
if not rows:
|
| 3153 |
-
rows.append(
|
| 3154 |
-
"<div class='live-line log-empty'>"
|
| 3155 |
-
"<span class='live-line-no'>-</span>"
|
| 3156 |
-
"<span class='live-line-text'><span class='live-line-prefix prefix-neutral'>-</span>No log lines available.</span>"
|
| 3157 |
-
"</div>"
|
| 3158 |
-
)
|
| 3159 |
-
|
| 3160 |
-
stream_parts: list[str] = [
|
| 3161 |
-
stream_chip("pid", str(snapshot.get("training_pid") or "n/a"), "muted"),
|
| 3162 |
-
stream_chip("exit", "n/a" if exit_code is None else str(exit_code), "muted"),
|
| 3163 |
-
stream_chip("tail", str(line_count), "muted"),
|
| 3164 |
-
stream_chip("total", str(total_log_lines), "muted"),
|
| 3165 |
-
stream_chip("log age", log_age_text, log_age_tone),
|
| 3166 |
-
stream_chip("errors", str(error_count), "fail" if error_count > 0 else "muted"),
|
| 3167 |
-
stream_chip("warnings", str(warn_count), "warn" if warn_count > 0 else "muted"),
|
| 3168 |
-
stream_chip("failure streak", str(failure_streak), "warn" if failure_streak > 0 else "muted"),
|
| 3169 |
-
stream_chip("recording", recording_label, recording_tone),
|
| 3170 |
-
stream_chip("artifacts", recording_coverage, recording_tone),
|
| 3171 |
-
stream_chip("write age", recording_latest, recording_tone),
|
| 3172 |
-
]
|
| 3173 |
-
|
| 3174 |
-
if progress:
|
| 3175 |
-
step = int(progress.get("global_step", 0) or 0)
|
| 3176 |
-
max_steps = int(progress.get("max_steps", 0) or 0)
|
| 3177 |
-
step_text = f"{step}/{max_steps}" if max_steps > 0 else str(step)
|
| 3178 |
-
stream_parts.append(stream_chip("step", step_text, "live" if training_running else "muted"))
|
| 3179 |
-
stream_parts.append(stream_chip("epoch", format_number(progress.get("epoch"), digits=3), "muted"))
|
| 3180 |
-
stream_parts.append(stream_chip("loss", format_number(progress_metrics.get("loss")), "ok" if training_running else "muted"))
|
| 3181 |
-
stream_parts.append(stream_chip("lr", format_number(progress_metrics.get("learning_rate"), digits=4), "muted"))
|
| 3182 |
-
if bool(progress.get("distill_enabled")):
|
| 3183 |
-
stream_parts.append(stream_chip("kd", format_number(progress_metrics.get("distill_kd_loss")), "live"))
|
| 3184 |
-
stream_parts.append(stream_chip("temp", format_number(progress_metrics.get("distill_temperature"), digits=3), "warn"))
|
| 3185 |
-
elif training_running:
|
| 3186 |
-
stream_parts.append(stream_chip("telemetry", "worker warming up", "warn"))
|
| 3187 |
-
elif log_age_seconds is not None and log_age_seconds > 30:
|
| 3188 |
-
stream_parts.append(stream_chip("stream", "no recent writes", "warn"))
|
| 3189 |
|
| 3190 |
if training_running:
|
| 3191 |
artifact_state = "live"
|
|
@@ -3213,60 +3215,37 @@ def render_runtime_stream_html(snapshot: dict[str, Any]) -> str:
|
|
| 3213 |
elif artifact_load == 0 and line_count > 0:
|
| 3214 |
artifact_load = 8
|
| 3215 |
|
| 3216 |
-
|
| 3217 |
-
"
|
| 3218 |
-
"
|
| 3219 |
-
"
|
| 3220 |
-
|
| 3221 |
-
|
| 3222 |
-
|
| 3223 |
-
|
| 3224 |
-
|
| 3225 |
-
|
| 3226 |
-
|
| 3227 |
-
|
| 3228 |
-
|
| 3229 |
-
|
| 3230 |
-
|
| 3231 |
-
|
| 3232 |
-
|
| 3233 |
-
|
| 3234 |
-
|
| 3235 |
-
|
| 3236 |
-
|
| 3237 |
-
|
| 3238 |
-
|
| 3239 |
-
|
| 3240 |
-
|
| 3241 |
-
|
| 3242 |
-
|
| 3243 |
-
|
| 3244 |
-
|
| 3245 |
-
|
| 3246 |
-
|
| 3247 |
-
+ "</div>"
|
| 3248 |
-
+ "<div class='live-artifact-track' style='--artifact-load:"
|
| 3249 |
-
+ html.escape(str(artifact_load))
|
| 3250 |
-
+ "%;'>"
|
| 3251 |
-
+ "<span class='live-artifact-fill'></span>"
|
| 3252 |
-
+ "<span class='live-artifact-flow'></span>"
|
| 3253 |
-
+ "</div>"
|
| 3254 |
-
+ "<div class='live-artifact-meta'>"
|
| 3255 |
-
+ "<span>load "
|
| 3256 |
-
+ html.escape(str(artifact_load))
|
| 3257 |
-
+ "%</span>"
|
| 3258 |
-
+ "<span>log age: "
|
| 3259 |
-
+ html.escape(log_age_text)
|
| 3260 |
-
+ " | write age: "
|
| 3261 |
-
+ html.escape(recording_latest)
|
| 3262 |
-
+ "</span>"
|
| 3263 |
-
+ "</div>"
|
| 3264 |
-
+ "</div>"
|
| 3265 |
-
+ "<div class='live-console-body'>"
|
| 3266 |
-
+ "".join(rows)
|
| 3267 |
-
+ "</div>"
|
| 3268 |
-
+ "</section>"
|
| 3269 |
-
)
|
| 3270 |
|
| 3271 |
|
| 3272 |
def latest_log_text(snapshot: dict[str, Any] | None = None) -> str:
|
|
@@ -3368,24 +3347,9 @@ def refresh_live_view(force_overview: bool = False) -> tuple[str, str, str, str,
|
|
| 3368 |
)
|
| 3369 |
except Exception as exc:
|
| 3370 |
fallback_log = (
|
| 3371 |
-
"
|
| 3372 |
-
"
|
| 3373 |
-
"
|
| 3374 |
-
"<span class='live-status-dot dot-fail'></span>"
|
| 3375 |
-
"<div class='live-head-copy'>"
|
| 3376 |
-
"<span class='live-console-title'>Runtime Console</span>"
|
| 3377 |
-
"<span class='live-console-subtitle'>Real-time native trainer output stream</span>"
|
| 3378 |
-
"</div>"
|
| 3379 |
-
"</div>"
|
| 3380 |
-
"<div class='live-head-right'>"
|
| 3381 |
-
"<span class='live-state-badge state-fail'>ERROR</span>"
|
| 3382 |
-
"</div>"
|
| 3383 |
-
"</header>"
|
| 3384 |
-
"<div class='live-console-body'>"
|
| 3385 |
-
"<div class='live-line log-error'><span class='live-line-no'>!</span>"
|
| 3386 |
-
"<span class='live-line-text'><span class='live-line-prefix prefix-error'>!</span>"
|
| 3387 |
-
f"UI refresh error: {html.escape(str(exc))}</span></div>"
|
| 3388 |
-
"</div></section>"
|
| 3389 |
)
|
| 3390 |
return (
|
| 3391 |
LIVE_REFRESH_CACHE.get("ops_strip", ""),
|
|
@@ -4032,7 +3996,14 @@ with gr.Blocks(title="UMSR Autonomous Trainer", **blocks_kwargs) as demo:
|
|
| 4032 |
"<div class='intel-panel-head'><span>Runtime Console</span>"
|
| 4033 |
"<span class='intel-panel-tag'>Live Feed</span></div>"
|
| 4034 |
)
|
| 4035 |
-
log_box = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4036 |
with gr.Column(scale=1, elem_classes=["side-column", "side-stack"]):
|
| 4037 |
with gr.Accordion("Runtime Details", open=True, elem_id="runtime-details-panel", elem_classes=["side-accordion"]):
|
| 4038 |
detail_box = gr.Markdown(elem_id="runtime-details-scroll", elem_classes=["intel-scroll", "side-scroll"])
|
|
|
|
| 895 |
padding: 6px;
|
| 896 |
}
|
| 897 |
|
| 898 |
+
.runtime-terminal,
|
| 899 |
+
#runtime-terminal {
|
| 900 |
+
min-height: 0 !important;
|
| 901 |
+
height: 100% !important;
|
| 902 |
+
flex: 1 1 auto !important;
|
| 903 |
+
padding: 6px !important;
|
| 904 |
+
}
|
| 905 |
+
|
| 906 |
+
#runtime-terminal textarea,
|
| 907 |
+
.runtime-terminal textarea {
|
| 908 |
+
height: 100% !important;
|
| 909 |
+
min-height: 440px !important;
|
| 910 |
+
max-height: none !important;
|
| 911 |
+
resize: none !important;
|
| 912 |
+
overflow: auto !important;
|
| 913 |
+
border: 1px solid #2a2a2a !important;
|
| 914 |
+
border-radius: 10px !important;
|
| 915 |
+
background:
|
| 916 |
+
radial-gradient(circle at 0 0, rgba(255, 255, 255, 0.02), transparent 52%),
|
| 917 |
+
#070707 !important;
|
| 918 |
+
color: #f0f0f0 !important;
|
| 919 |
+
font-family: "JetBrains Mono", "IBM Plex Mono", "Cascadia Code", "SFMono-Regular", "Fira Code", "Consolas", monospace !important;
|
| 920 |
+
font-size: 12.4px !important;
|
| 921 |
+
line-height: 1.38 !important;
|
| 922 |
+
letter-spacing: 0.01em !important;
|
| 923 |
+
padding: 10px 12px !important;
|
| 924 |
+
box-shadow:
|
| 925 |
+
inset 0 1px 0 rgba(255, 255, 255, 0.03),
|
| 926 |
+
0 12px 24px rgba(0, 0, 0, 0.52) !important;
|
| 927 |
+
scrollbar-width: thin;
|
| 928 |
+
scrollbar-color: #606060 #0a0a0a;
|
| 929 |
+
}
|
| 930 |
+
|
| 931 |
+
#runtime-terminal textarea::-webkit-scrollbar,
|
| 932 |
+
.runtime-terminal textarea::-webkit-scrollbar {
|
| 933 |
+
width: 10px;
|
| 934 |
+
}
|
| 935 |
+
|
| 936 |
+
#runtime-terminal textarea::-webkit-scrollbar-track,
|
| 937 |
+
.runtime-terminal textarea::-webkit-scrollbar-track {
|
| 938 |
+
background: #0b0b0b;
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
#runtime-terminal textarea::-webkit-scrollbar-thumb,
|
| 942 |
+
.runtime-terminal textarea::-webkit-scrollbar-thumb {
|
| 943 |
+
background: linear-gradient(180deg, #5e5e5e, #444444);
|
| 944 |
+
border: 2px solid #0b0b0b;
|
| 945 |
+
border-radius: 8px;
|
| 946 |
+
}
|
| 947 |
+
|
| 948 |
+
#runtime-terminal .gradio-textbox label,
|
| 949 |
+
.runtime-terminal .gradio-textbox label {
|
| 950 |
+
display: none !important;
|
| 951 |
}
|
| 952 |
|
| 953 |
.live-console {
|
|
|
|
| 1773 |
};
|
| 1774 |
|
| 1775 |
const followTerminal = () => {
|
| 1776 |
+
const body = document.querySelector("#runtime-terminal textarea, .runtime-terminal textarea");
|
| 1777 |
if (!body) {
|
| 1778 |
window.setTimeout(followTerminal, 150);
|
| 1779 |
return;
|
|
|
|
| 3181 |
display_text = line
|
| 3182 |
if tone_class == "log-command" and line.startswith("[command]"):
|
| 3183 |
display_text = line.replace("[command]", "", 1).strip()
|
| 3184 |
+
text = display_text if display_text else ""
|
| 3185 |
+
prefix_symbol, _ = runtime_prefix_for_tone(tone_class)
|
| 3186 |
line_no = start_index + idx
|
| 3187 |
+
rows.append(f"{line_no:>6} {prefix_symbol} {text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3188 |
|
| 3189 |
if not rows:
|
| 3190 |
+
rows.append(" - No log lines available.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3191 |
|
| 3192 |
if training_running:
|
| 3193 |
artifact_state = "live"
|
|
|
|
| 3215 |
elif artifact_load == 0 and line_count > 0:
|
| 3216 |
artifact_load = 8
|
| 3217 |
|
| 3218 |
+
stats = [
|
| 3219 |
+
f"PID={snapshot.get('training_pid') or 'n/a'}",
|
| 3220 |
+
f"EXIT={'n/a' if exit_code is None else exit_code}",
|
| 3221 |
+
f"TAIL={line_count}",
|
| 3222 |
+
f"TOTAL={total_log_lines}",
|
| 3223 |
+
f"ERR={error_count}",
|
| 3224 |
+
f"WARN={warn_count}",
|
| 3225 |
+
f"STREAK={failure_streak}",
|
| 3226 |
+
f"REC={recording_label}",
|
| 3227 |
+
f"ART={recording_coverage}",
|
| 3228 |
+
]
|
| 3229 |
+
if progress:
|
| 3230 |
+
step = int(progress.get("global_step", 0) or 0)
|
| 3231 |
+
max_steps = int(progress.get("max_steps", 0) or 0)
|
| 3232 |
+
step_text = f"{step}/{max_steps}" if max_steps > 0 else str(step)
|
| 3233 |
+
stats.append(f"STEP={step_text}")
|
| 3234 |
+
stats.append(f"EPOCH={format_number(progress.get('epoch'), digits=3)}")
|
| 3235 |
+
stats.append(f"LOSS={format_number(progress_metrics.get('loss'))}")
|
| 3236 |
+
stats.append(f"LR={format_number(progress_metrics.get('learning_rate'), digits=4)}")
|
| 3237 |
+
if bool(progress.get("distill_enabled")):
|
| 3238 |
+
stats.append(f"KD={format_number(progress_metrics.get('distill_kd_loss'))}")
|
| 3239 |
+
stats.append(f"T={format_number(progress_metrics.get('distill_temperature'), digits=3)}")
|
| 3240 |
+
|
| 3241 |
+
header_lines = [
|
| 3242 |
+
"RUNTIME CONSOLE | Real-time native trainer output stream",
|
| 3243 |
+
f"STATE={state_label} | ARTIFACT={artifact_status} | LOAD={artifact_load}% | LOG_AGE={log_age_text} | WRITE_AGE={recording_latest}",
|
| 3244 |
+
f"LOG={compact_log_path} | TRIGGER={snapshot.get('last_trigger') or 'n/a'} | WINDOW={window_text} | AUDIT={audit_event}",
|
| 3245 |
+
" | ".join(stats),
|
| 3246 |
+
"-" * 108,
|
| 3247 |
+
]
|
| 3248 |
+
return "\n".join(header_lines + rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3249 |
|
| 3250 |
|
| 3251 |
def latest_log_text(snapshot: dict[str, Any] | None = None) -> str:
|
|
|
|
| 3347 |
)
|
| 3348 |
except Exception as exc:
|
| 3349 |
fallback_log = (
|
| 3350 |
+
"RUNTIME CONSOLE | ERROR\n"
|
| 3351 |
+
"STATE=ATTENTION | ARTIFACT=ERROR\n"
|
| 3352 |
+
f"UI refresh error: {str(exc)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3353 |
)
|
| 3354 |
return (
|
| 3355 |
LIVE_REFRESH_CACHE.get("ops_strip", ""),
|
|
|
|
| 3996 |
"<div class='intel-panel-head'><span>Runtime Console</span>"
|
| 3997 |
"<span class='intel-panel-tag'>Live Feed</span></div>"
|
| 3998 |
)
|
| 3999 |
+
log_box = gr.Textbox(
|
| 4000 |
+
show_label=False,
|
| 4001 |
+
elem_id="runtime-terminal",
|
| 4002 |
+
elem_classes=["runtime-terminal"],
|
| 4003 |
+
lines=26,
|
| 4004 |
+
max_lines=34,
|
| 4005 |
+
interactive=False,
|
| 4006 |
+
)
|
| 4007 |
with gr.Column(scale=1, elem_classes=["side-column", "side-stack"]):
|
| 4008 |
with gr.Accordion("Runtime Details", open=True, elem_id="runtime-details-panel", elem_classes=["side-accordion"]):
|
| 4009 |
detail_box = gr.Markdown(elem_id="runtime-details-scroll", elem_classes=["intel-scroll", "side-scroll"])
|