Enable autonomous-by-default run profile and auto-apply full execution parameters.
Browse files
README.md
CHANGED
|
@@ -33,6 +33,7 @@ Credentials and publish permissions are handled by deployment runtime settings.
|
|
| 33 |
|
| 34 |
## Runtime controls
|
| 35 |
|
|
|
|
| 36 |
- `Run Evaluation After Training`: toggles post-train eval in runtime config.
|
| 37 |
- `Enforce Quality Gate`: enables/disables promotion gate checks.
|
| 38 |
- `Gate Min pass@1`, `Gate Min pass@k`, `Gate Min Rows`: runtime gate thresholds.
|
|
|
|
| 33 |
|
| 34 |
## Runtime controls
|
| 35 |
|
| 36 |
+
- `Autonomous Mode`: enabled by default; applies full-stage training/eval/gate/publish profile automatically.
|
| 37 |
- `Run Evaluation After Training`: toggles post-train eval in runtime config.
|
| 38 |
- `Enforce Quality Gate`: enables/disables promotion gate checks.
|
| 39 |
- `Gate Min pass@1`, `Gate Min pass@k`, `Gate Min Rows`: runtime gate thresholds.
|
app.py
CHANGED
|
@@ -334,6 +334,8 @@ An autonomous training operations console for DeepSeek-Math that runs multi-stag
|
|
| 334 |
4. Run post-training evaluation with pass@k-style sampling and family-level metrics.
|
| 335 |
5. Enforce autonomous quality gates before adapter promotion/push.
|
| 336 |
6. Stream live terminal telemetry, tactical visualization, and structured run summaries.
|
|
|
|
|
|
|
| 337 |
"""
|
| 338 |
|
| 339 |
|
|
@@ -368,6 +370,12 @@ TEMPLATE_STAGE_COUNT = max(1, len(TEMPLATE_CFG.get("stages", []) or [None]))
|
|
| 368 |
TEMPLATE_QUALITY_GATE = TEMPLATE_CFG.get("quality_gate", {})
|
| 369 |
if not isinstance(TEMPLATE_QUALITY_GATE, dict):
|
| 370 |
TEMPLATE_QUALITY_GATE = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
_raw_gate_enabled = TEMPLATE_QUALITY_GATE.get("enabled", True)
|
| 372 |
if isinstance(_raw_gate_enabled, bool):
|
| 373 |
DEFAULT_GATE_ENABLED = _raw_gate_enabled
|
|
@@ -376,6 +384,9 @@ else:
|
|
| 376 |
DEFAULT_GATE_MIN_ROWS = max(1, _safe_int(TEMPLATE_QUALITY_GATE.get("min_evaluated_rows"), 120))
|
| 377 |
DEFAULT_GATE_MIN_PASS_AT_1 = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_1"), 0.01))
|
| 378 |
DEFAULT_GATE_MIN_PASS_AT_K = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_k"), 0.06))
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
|
| 381 |
def now_ts() -> str:
|
|
@@ -964,6 +975,7 @@ def run_pipeline_core(
|
|
| 964 |
dataset_repo_id: str,
|
| 965 |
model_repo_id: str,
|
| 966 |
base_model_id: str,
|
|
|
|
| 967 |
start_stage: int,
|
| 968 |
max_stages: int,
|
| 969 |
run_eval: bool,
|
|
@@ -1007,6 +1019,21 @@ def run_pipeline_core(
|
|
| 1007 |
gate_min_rows = int(gate_min_rows)
|
| 1008 |
gate_min_pass_at_1 = float(gate_min_pass_at_1)
|
| 1009 |
gate_min_pass_at_k = float(gate_min_pass_at_k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1010 |
if stage_start < 1:
|
| 1011 |
raise ValueError("Start stage must be >= 1.")
|
| 1012 |
if stage_start > TEMPLATE_STAGE_COUNT:
|
|
@@ -1038,6 +1065,7 @@ def run_pipeline_core(
|
|
| 1038 |
"dataset_repo_id": dataset_repo_id,
|
| 1039 |
"model_repo_id": model_repo_id,
|
| 1040 |
"base_model_id": base_model_id,
|
|
|
|
| 1041 |
"start_stage": stage_start,
|
| 1042 |
"max_stages": stage_count,
|
| 1043 |
"run_eval": bool(run_eval),
|
|
@@ -1055,6 +1083,11 @@ def run_pipeline_core(
|
|
| 1055 |
)
|
| 1056 |
|
| 1057 |
append_log(log_lines, f"Run {run_label} started.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1058 |
append_log(
|
| 1059 |
log_lines,
|
| 1060 |
f"Runtime: python={runtime['python']} gradio={runtime['gradio']} torch={runtime['torch']} "
|
|
@@ -1313,6 +1346,7 @@ def run_pipeline(
|
|
| 1313 |
dataset_repo_id: str,
|
| 1314 |
model_repo_id: str,
|
| 1315 |
base_model_id: str,
|
|
|
|
| 1316 |
start_stage: int,
|
| 1317 |
max_stages: int,
|
| 1318 |
run_eval: bool,
|
|
@@ -1330,6 +1364,7 @@ def run_pipeline(
|
|
| 1330 |
dataset_repo_id=dataset_repo_id,
|
| 1331 |
model_repo_id=model_repo_id,
|
| 1332 |
base_model_id=base_model_id,
|
|
|
|
| 1333 |
start_stage=start_stage,
|
| 1334 |
max_stages=max_stages,
|
| 1335 |
run_eval=run_eval,
|
|
@@ -1366,6 +1401,8 @@ with gr.Blocks(title="Math Conjecture Trainer Space") as demo:
|
|
| 1366 |
label="Base Model ID",
|
| 1367 |
value="deepseek-ai/deepseek-math-v2",
|
| 1368 |
)
|
|
|
|
|
|
|
| 1369 |
with gr.Row():
|
| 1370 |
start_stage = gr.Slider(label="Stage Start", minimum=1, maximum=TEMPLATE_STAGE_COUNT, step=1, value=1)
|
| 1371 |
max_stages = gr.Slider(
|
|
@@ -1427,6 +1464,7 @@ with gr.Blocks(title="Math Conjecture Trainer Space") as demo:
|
|
| 1427 |
dataset_repo_id,
|
| 1428 |
model_repo_id,
|
| 1429 |
base_model_id,
|
|
|
|
| 1430 |
start_stage,
|
| 1431 |
max_stages,
|
| 1432 |
run_eval,
|
|
|
|
| 334 |
4. Run post-training evaluation with pass@k-style sampling and family-level metrics.
|
| 335 |
5. Enforce autonomous quality gates before adapter promotion/push.
|
| 336 |
6. Stream live terminal telemetry, tactical visualization, and structured run summaries.
|
| 337 |
+
|
| 338 |
+
Autonomous Mode is enabled by default and applies full-stage execution parameters automatically.
|
| 339 |
"""
|
| 340 |
|
| 341 |
|
|
|
|
| 370 |
TEMPLATE_QUALITY_GATE = TEMPLATE_CFG.get("quality_gate", {})
|
| 371 |
if not isinstance(TEMPLATE_QUALITY_GATE, dict):
|
| 372 |
TEMPLATE_QUALITY_GATE = {}
|
| 373 |
+
TEMPLATE_POST_EVAL = TEMPLATE_CFG.get("post_eval", {})
|
| 374 |
+
if not isinstance(TEMPLATE_POST_EVAL, dict):
|
| 375 |
+
TEMPLATE_POST_EVAL = {}
|
| 376 |
+
TEMPLATE_HUB = TEMPLATE_CFG.get("hub", {})
|
| 377 |
+
if not isinstance(TEMPLATE_HUB, dict):
|
| 378 |
+
TEMPLATE_HUB = {}
|
| 379 |
_raw_gate_enabled = TEMPLATE_QUALITY_GATE.get("enabled", True)
|
| 380 |
if isinstance(_raw_gate_enabled, bool):
|
| 381 |
DEFAULT_GATE_ENABLED = _raw_gate_enabled
|
|
|
|
| 384 |
DEFAULT_GATE_MIN_ROWS = max(1, _safe_int(TEMPLATE_QUALITY_GATE.get("min_evaluated_rows"), 120))
|
| 385 |
DEFAULT_GATE_MIN_PASS_AT_1 = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_1"), 0.01))
|
| 386 |
DEFAULT_GATE_MIN_PASS_AT_K = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_k"), 0.06))
|
| 387 |
+
DEFAULT_AUTO_EVAL_K = max(1, _safe_int(TEMPLATE_POST_EVAL.get("k"), 4))
|
| 388 |
+
DEFAULT_AUTO_EVAL_SAMPLES = max(1, _safe_int(TEMPLATE_POST_EVAL.get("max_samples"), 300))
|
| 389 |
+
DEFAULT_AUTO_PUSH_TO_HUB = bool(TEMPLATE_HUB.get("push_to_hub", True))
|
| 390 |
|
| 391 |
|
| 392 |
def now_ts() -> str:
|
|
|
|
| 975 |
dataset_repo_id: str,
|
| 976 |
model_repo_id: str,
|
| 977 |
base_model_id: str,
|
| 978 |
+
autonomous_mode: bool,
|
| 979 |
start_stage: int,
|
| 980 |
max_stages: int,
|
| 981 |
run_eval: bool,
|
|
|
|
| 1019 |
gate_min_rows = int(gate_min_rows)
|
| 1020 |
gate_min_pass_at_1 = float(gate_min_pass_at_1)
|
| 1021 |
gate_min_pass_at_k = float(gate_min_pass_at_k)
|
| 1022 |
+
|
| 1023 |
+
if autonomous_mode:
|
| 1024 |
+
stage_start = 1
|
| 1025 |
+
stage_count = TEMPLATE_STAGE_COUNT
|
| 1026 |
+
run_eval = True
|
| 1027 |
+
eval_k = DEFAULT_AUTO_EVAL_K
|
| 1028 |
+
eval_samples = DEFAULT_AUTO_EVAL_SAMPLES
|
| 1029 |
+
enforce_quality_gate = bool(DEFAULT_GATE_ENABLED)
|
| 1030 |
+
gate_min_rows = DEFAULT_GATE_MIN_ROWS
|
| 1031 |
+
gate_min_pass_at_1 = DEFAULT_GATE_MIN_PASS_AT_1
|
| 1032 |
+
gate_min_pass_at_k = DEFAULT_GATE_MIN_PASS_AT_K
|
| 1033 |
+
push_to_hub = bool(DEFAULT_AUTO_PUSH_TO_HUB)
|
| 1034 |
+
force_redownload = False
|
| 1035 |
+
preflight_only = False
|
| 1036 |
+
|
| 1037 |
if stage_start < 1:
|
| 1038 |
raise ValueError("Start stage must be >= 1.")
|
| 1039 |
if stage_start > TEMPLATE_STAGE_COUNT:
|
|
|
|
| 1065 |
"dataset_repo_id": dataset_repo_id,
|
| 1066 |
"model_repo_id": model_repo_id,
|
| 1067 |
"base_model_id": base_model_id,
|
| 1068 |
+
"autonomous_mode": bool(autonomous_mode),
|
| 1069 |
"start_stage": stage_start,
|
| 1070 |
"max_stages": stage_count,
|
| 1071 |
"run_eval": bool(run_eval),
|
|
|
|
| 1083 |
)
|
| 1084 |
|
| 1085 |
append_log(log_lines, f"Run {run_label} started.")
|
| 1086 |
+
if autonomous_mode:
|
| 1087 |
+
append_log(
|
| 1088 |
+
log_lines,
|
| 1089 |
+
"Autonomous mode active: full-stage training/eval/gating/publish profile applied.",
|
| 1090 |
+
)
|
| 1091 |
append_log(
|
| 1092 |
log_lines,
|
| 1093 |
f"Runtime: python={runtime['python']} gradio={runtime['gradio']} torch={runtime['torch']} "
|
|
|
|
| 1346 |
dataset_repo_id: str,
|
| 1347 |
model_repo_id: str,
|
| 1348 |
base_model_id: str,
|
| 1349 |
+
autonomous_mode: bool,
|
| 1350 |
start_stage: int,
|
| 1351 |
max_stages: int,
|
| 1352 |
run_eval: bool,
|
|
|
|
| 1364 |
dataset_repo_id=dataset_repo_id,
|
| 1365 |
model_repo_id=model_repo_id,
|
| 1366 |
base_model_id=base_model_id,
|
| 1367 |
+
autonomous_mode=autonomous_mode,
|
| 1368 |
start_stage=start_stage,
|
| 1369 |
max_stages=max_stages,
|
| 1370 |
run_eval=run_eval,
|
|
|
|
| 1401 |
label="Base Model ID",
|
| 1402 |
value="deepseek-ai/deepseek-math-v2",
|
| 1403 |
)
|
| 1404 |
+
with gr.Row():
|
| 1405 |
+
autonomous_mode = gr.Checkbox(label="Autonomous Mode", value=True)
|
| 1406 |
with gr.Row():
|
| 1407 |
start_stage = gr.Slider(label="Stage Start", minimum=1, maximum=TEMPLATE_STAGE_COUNT, step=1, value=1)
|
| 1408 |
max_stages = gr.Slider(
|
|
|
|
| 1464 |
dataset_repo_id,
|
| 1465 |
model_repo_id,
|
| 1466 |
base_model_id,
|
| 1467 |
+
autonomous_mode,
|
| 1468 |
start_stage,
|
| 1469 |
max_stages,
|
| 1470 |
run_eval,
|