Spaces:

NorthernTribe-Research
/

math_trainer

Running

App Files Files Community

NorthernTribe-Research commited on about 14 hours ago

Commit

11f9ebf

verified ·

1 Parent(s): a68d3ef

Enable autonomous-by-default run profile and auto-apply full execution parameters.

Browse files

Files changed (2) hide show

README.md +1 -0
app.py +38 -0

README.md CHANGED Viewed

@@ -33,6 +33,7 @@ Credentials and publish permissions are handled by deployment runtime settings.
 ## Runtime controls
 - `Run Evaluation After Training`: toggles post-train eval in runtime config.
 - `Enforce Quality Gate`: enables/disables promotion gate checks.
 - `Gate Min pass@1`, `Gate Min pass@k`, `Gate Min Rows`: runtime gate thresholds.

 ## Runtime controls
+- `Autonomous Mode`: enabled by default; applies full-stage training/eval/gate/publish profile automatically.
 - `Run Evaluation After Training`: toggles post-train eval in runtime config.
 - `Enforce Quality Gate`: enables/disables promotion gate checks.
 - `Gate Min pass@1`, `Gate Min pass@k`, `Gate Min Rows`: runtime gate thresholds.

app.py CHANGED Viewed

@@ -334,6 +334,8 @@ An autonomous training operations console for DeepSeek-Math that runs multi-stag
 4. Run post-training evaluation with pass@k-style sampling and family-level metrics.
 5. Enforce autonomous quality gates before adapter promotion/push.
 6. Stream live terminal telemetry, tactical visualization, and structured run summaries.
 """
@@ -368,6 +370,12 @@ TEMPLATE_STAGE_COUNT = max(1, len(TEMPLATE_CFG.get("stages", []) or [None]))
 TEMPLATE_QUALITY_GATE = TEMPLATE_CFG.get("quality_gate", {})
 if not isinstance(TEMPLATE_QUALITY_GATE, dict):
     TEMPLATE_QUALITY_GATE = {}
 _raw_gate_enabled = TEMPLATE_QUALITY_GATE.get("enabled", True)
 if isinstance(_raw_gate_enabled, bool):
     DEFAULT_GATE_ENABLED = _raw_gate_enabled
@@ -376,6 +384,9 @@ else:
 DEFAULT_GATE_MIN_ROWS = max(1, _safe_int(TEMPLATE_QUALITY_GATE.get("min_evaluated_rows"), 120))
 DEFAULT_GATE_MIN_PASS_AT_1 = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_1"), 0.01))
 DEFAULT_GATE_MIN_PASS_AT_K = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_k"), 0.06))
 def now_ts() -> str:
@@ -964,6 +975,7 @@ def run_pipeline_core(
     dataset_repo_id: str,
     model_repo_id: str,
     base_model_id: str,
     start_stage: int,
     max_stages: int,
     run_eval: bool,
@@ -1007,6 +1019,21 @@ def run_pipeline_core(
         gate_min_rows = int(gate_min_rows)
         gate_min_pass_at_1 = float(gate_min_pass_at_1)
         gate_min_pass_at_k = float(gate_min_pass_at_k)
         if stage_start < 1:
             raise ValueError("Start stage must be >= 1.")
         if stage_start > TEMPLATE_STAGE_COUNT:
@@ -1038,6 +1065,7 @@ def run_pipeline_core(
                 "dataset_repo_id": dataset_repo_id,
                 "model_repo_id": model_repo_id,
                 "base_model_id": base_model_id,
                 "start_stage": stage_start,
                 "max_stages": stage_count,
                 "run_eval": bool(run_eval),
@@ -1055,6 +1083,11 @@ def run_pipeline_core(
         )
         append_log(log_lines, f"Run {run_label} started.")
         append_log(
             log_lines,
             f"Runtime: python={runtime['python']} gradio={runtime['gradio']} torch={runtime['torch']} "
@@ -1313,6 +1346,7 @@ def run_pipeline(
     dataset_repo_id: str,
     model_repo_id: str,
     base_model_id: str,
     start_stage: int,
     max_stages: int,
     run_eval: bool,
@@ -1330,6 +1364,7 @@ def run_pipeline(
         dataset_repo_id=dataset_repo_id,
         model_repo_id=model_repo_id,
         base_model_id=base_model_id,
         start_stage=start_stage,
         max_stages=max_stages,
         run_eval=run_eval,
@@ -1366,6 +1401,8 @@ with gr.Blocks(title="Math Conjecture Trainer Space") as demo:
             label="Base Model ID",
             value="deepseek-ai/deepseek-math-v2",
         )
     with gr.Row():
         start_stage = gr.Slider(label="Stage Start", minimum=1, maximum=TEMPLATE_STAGE_COUNT, step=1, value=1)
         max_stages = gr.Slider(
@@ -1427,6 +1464,7 @@ with gr.Blocks(title="Math Conjecture Trainer Space") as demo:
             dataset_repo_id,
             model_repo_id,
             base_model_id,
             start_stage,
             max_stages,
             run_eval,

 4. Run post-training evaluation with pass@k-style sampling and family-level metrics.
 5. Enforce autonomous quality gates before adapter promotion/push.
 6. Stream live terminal telemetry, tactical visualization, and structured run summaries.
+Autonomous Mode is enabled by default and applies full-stage execution parameters automatically.
 """
 TEMPLATE_QUALITY_GATE = TEMPLATE_CFG.get("quality_gate", {})
 if not isinstance(TEMPLATE_QUALITY_GATE, dict):
     TEMPLATE_QUALITY_GATE = {}
+TEMPLATE_POST_EVAL = TEMPLATE_CFG.get("post_eval", {})
+if not isinstance(TEMPLATE_POST_EVAL, dict):
+    TEMPLATE_POST_EVAL = {}
+TEMPLATE_HUB = TEMPLATE_CFG.get("hub", {})
+if not isinstance(TEMPLATE_HUB, dict):
+    TEMPLATE_HUB = {}
 _raw_gate_enabled = TEMPLATE_QUALITY_GATE.get("enabled", True)
 if isinstance(_raw_gate_enabled, bool):
     DEFAULT_GATE_ENABLED = _raw_gate_enabled
 DEFAULT_GATE_MIN_ROWS = max(1, _safe_int(TEMPLATE_QUALITY_GATE.get("min_evaluated_rows"), 120))
 DEFAULT_GATE_MIN_PASS_AT_1 = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_1"), 0.01))
 DEFAULT_GATE_MIN_PASS_AT_K = max(0.0, _safe_float(TEMPLATE_QUALITY_GATE.get("min_pass_at_k"), 0.06))
+DEFAULT_AUTO_EVAL_K = max(1, _safe_int(TEMPLATE_POST_EVAL.get("k"), 4))
+DEFAULT_AUTO_EVAL_SAMPLES = max(1, _safe_int(TEMPLATE_POST_EVAL.get("max_samples"), 300))
+DEFAULT_AUTO_PUSH_TO_HUB = bool(TEMPLATE_HUB.get("push_to_hub", True))
 def now_ts() -> str:
     dataset_repo_id: str,
     model_repo_id: str,
     base_model_id: str,
+    autonomous_mode: bool,
     start_stage: int,
     max_stages: int,
     run_eval: bool,
         gate_min_rows = int(gate_min_rows)
         gate_min_pass_at_1 = float(gate_min_pass_at_1)
         gate_min_pass_at_k = float(gate_min_pass_at_k)
+        if autonomous_mode:
+            stage_start = 1
+            stage_count = TEMPLATE_STAGE_COUNT
+            run_eval = True
+            eval_k = DEFAULT_AUTO_EVAL_K
+            eval_samples = DEFAULT_AUTO_EVAL_SAMPLES
+            enforce_quality_gate = bool(DEFAULT_GATE_ENABLED)
+            gate_min_rows = DEFAULT_GATE_MIN_ROWS
+            gate_min_pass_at_1 = DEFAULT_GATE_MIN_PASS_AT_1
+            gate_min_pass_at_k = DEFAULT_GATE_MIN_PASS_AT_K
+            push_to_hub = bool(DEFAULT_AUTO_PUSH_TO_HUB)
+            force_redownload = False
+            preflight_only = False
         if stage_start < 1:
             raise ValueError("Start stage must be >= 1.")
         if stage_start > TEMPLATE_STAGE_COUNT:
                 "dataset_repo_id": dataset_repo_id,
                 "model_repo_id": model_repo_id,
                 "base_model_id": base_model_id,
+                "autonomous_mode": bool(autonomous_mode),
                 "start_stage": stage_start,
                 "max_stages": stage_count,
                 "run_eval": bool(run_eval),
         )
         append_log(log_lines, f"Run {run_label} started.")
+        if autonomous_mode:
+            append_log(
+                log_lines,
+                "Autonomous mode active: full-stage training/eval/gating/publish profile applied.",
+            )
         append_log(
             log_lines,
             f"Runtime: python={runtime['python']} gradio={runtime['gradio']} torch={runtime['torch']} "
     dataset_repo_id: str,
     model_repo_id: str,
     base_model_id: str,
+    autonomous_mode: bool,
     start_stage: int,
     max_stages: int,
     run_eval: bool,
         dataset_repo_id=dataset_repo_id,
         model_repo_id=model_repo_id,
         base_model_id=base_model_id,
+        autonomous_mode=autonomous_mode,
         start_stage=start_stage,
         max_stages=max_stages,
         run_eval=run_eval,
             label="Base Model ID",
             value="deepseek-ai/deepseek-math-v2",
         )
+    with gr.Row():
+        autonomous_mode = gr.Checkbox(label="Autonomous Mode", value=True)
     with gr.Row():
         start_stage = gr.Slider(label="Stage Start", minimum=1, maximum=TEMPLATE_STAGE_COUNT, step=1, value=1)
         max_stages = gr.Slider(
             dataset_repo_id,
             model_repo_id,
             base_model_id,
+            autonomous_mode,
             start_stage,
             max_stages,
             run_eval,