import sys import types for _mod in ("audioop", "pyaudioop"): if _mod not in sys.modules: sys.modules[_mod] = types.ModuleType(_mod) import streamlit as st import subprocess import os import json import threading import time PLOTS_DIR = "./plots" LOG_FILE = "./training.log" DONE_FILE = "./training_done.txt" PID_FILE = "./training.pid" RESULTS_FILE = f"{PLOTS_DIR}/results.json" TASK_ID_FILE = "./.task_id" # Pre-staged artifacts from a previous successful run — judges see these # without having to actually click "Run Training". Purely additive: if the # files don't exist, nothing renders. SAMPLE_LOG_FILE = "./sample_run.log" SAMPLE_BEFORE_AFTER_PNG = "./sample_before_after.png" SAMPLE_REWARD_PNG = "./sample_reward_curve.png" SAMPLE_RESULTS_JSON = "./sample_results.json" # --------------------------------------------------------------------------- # Trainer hub config — same across all 3 trainer Spaces, the active task # is selected by the contents of `.task_id` at the Space root. # --------------------------------------------------------------------------- TASKS = { "dependency": { "title": "Dependency Review", "subtitle": "Supply-chain literacy", "blurb": "Typosquats, hallucinated PyPI imports, pinned CVEs, license risks. Tests the baseline of supply-chain awareness every reviewer should have.", "stats": "24 scenarios · 120 findings · Qwen 1.5B · 3 epochs", "delta": "+0.302", "deltatxt": "20 / 24 wins · 0.083 → 0.385", "space_url": "https://huggingface.co/spaces/sam25kat/securereview-trainer", }, "iac": { "title": "IaC Misconfiguration", "subtitle": "Cloud-security reasoning", "blurb": "CIS violations in Terraform / K8s — public buckets, wildcard IAM, privileged containers, missing encryption. Multi-file cloud reasoning.", "stats": "24 scenarios · 155 findings · Qwen 1.5B · 3 epochs", "delta": "+0.126", "deltatxt": "6 / 13 wins · 0.177 → 0.303", "space_url": "https://huggingface.co/spaces/sam25kat/securereview-trainer-iac", }, "migration": { "title": "Migration Safety", "subtitle": "Database engineering judgment", "blurb": "SQL migrations against live production context — table sizes, write throughput, downstream services. Hot-row contention, RLS gaps, MVCC bloat.", "stats": "12 curriculum-filtered (of 28) · 155 findings · Qwen 7B 4-bit · 3 epochs", "delta": "+0.295", "deltatxt": "10 / 12 wins · 0.170 → 0.465", "space_url": "https://huggingface.co/spaces/sam25kat/securereview-trainer-migration", }, } TASK_ORDER = ["dependency", "iac", "migration"] def detect_local_task() -> str: """Each trainer Space puts its own task id in /.task_id — defaults to dep.""" if os.path.exists(TASK_ID_FILE): try: t = open(TASK_ID_FILE).read().strip() if t in TASKS: return t except OSError: pass return "dependency" LOCAL_TASK = detect_local_task() # --------------------------------------------------------------------------- def is_training_alive(): if not os.path.exists(PID_FILE): return False try: with open(PID_FILE) as f: pid = int(f.read().strip()) os.kill(pid, 0) return True except (ProcessLookupError, ValueError, PermissionError): return False def _run(): os.makedirs(PLOTS_DIR, exist_ok=True) with open(LOG_FILE, "w", buffering=1) as log: proc = subprocess.Popen( [sys.executable, "train.py"], stdout=log, stderr=subprocess.STDOUT, ) with open(PID_FILE, "w") as f: f.write(str(proc.pid)) proc.wait() with open(DONE_FILE, "w") as f: f.write("done") if os.path.exists(PID_FILE): try: os.remove(PID_FILE) except OSError: pass # --------------------------------------------------------------------------- # Page chrome # --------------------------------------------------------------------------- st.set_page_config(page_title="SecureReview Trainer", layout="wide") st.markdown( """ """, unsafe_allow_html=True, ) st.title("SecureReview — Trainer Hub") st.markdown( "**Three security-review domains. One canonical SFT → GRPO hybrid pipeline.** " "Click *Run Training* on any card — full SFT run completes in ~30 s on a single GPU credit, " "with loss curve + before/after plot rendered live." ) st.markdown("---") # --------------------------------------------------------------------------- # Three task cards # --------------------------------------------------------------------------- cols = st.columns(3, gap="medium") for idx, task_id in enumerate(TASK_ORDER): cfg = TASKS[task_id] is_active = task_id == LOCAL_TASK with cols[idx]: active_cls = "active" if is_active else "" active_badge = 'THIS SPACE' if is_active else "" card_html = f"""