import gradio as gr import subprocess import os import threading import sys import time from pathlib import Path # Training log file TRAINING_LOG = "/tmp/training.log" TRAINING_PID_FILE = "/tmp/training.pid" def start_training(): """Start D1337 CIPHER training - SUBPROCESS VERSION (MORE RELIABLE)""" # Check if already running if Path(TRAINING_PID_FILE).exists(): try: pid = int(Path(TRAINING_PID_FILE).read_text()) # Check if process still running os.kill(pid, 0) # Will raise if not running return "āš ļø Training already running! Check logs below..." except: # Process dead, remove PID file Path(TRAINING_PID_FILE).unlink() # Clear log file Path(TRAINING_LOG).write_text("šŸ”„ D1337 CIPHER TRAINING STARTING...\n\n", encoding="utf-8") # Use optimized training script for L40S x4 runner_path = "/app/train.py" # Start subprocess def run_training(): try: # Run training script and redirect output to log with open(TRAINING_LOG, "a", encoding="utf-8") as log_file: process = subprocess.Popen( [sys.executable, runner_path], stdout=log_file, stderr=subprocess.STDOUT, cwd="/app", env=os.environ.copy() ) # Write PID Path(TRAINING_PID_FILE).write_text(str(process.pid)) # Wait for completion process.wait() # Remove PID file if Path(TRAINING_PID_FILE).exists(): Path(TRAINING_PID_FILE).unlink() except Exception as e: error_msg = f"\nāŒ ERROR: {str(e)}\n" Path(TRAINING_LOG).write_text(error_msg, encoding="utf-8", mode="a") if Path(TRAINING_PID_FILE).exists(): Path(TRAINING_PID_FILE).unlink() # Run in background thread thread = threading.Thread(target=run_training) thread.daemon = False thread.start() return "šŸ”„ D1337 CIPHER TRAINING STARTED!\n\nLoading model (31B) - this may take 2-5 minutes...\n\nOutput will appear below automatically (refresh every 3 seconds)." def get_training_log(): """Get latest training log""" try: if Path(TRAINING_LOG).exists(): content = Path(TRAINING_LOG).read_text(encoding="utf-8") if content.strip(): return content return "Waiting for training to start...\n\nClick 'START TRAINING' button above." except Exception as e: return f"Error reading log: {str(e)}" # UI with gr.Blocks(title="D1337 CIPHER Training") as demo: gr.Markdown("# šŸ”„ D1337 CIPHER C2 V.1 - TRAINING") gr.Markdown("**Hardware**: L40S x4 (192GB VRAM)") gr.Markdown("**Base**: Huihui-GLM-4.7-Flash-abliterated (31B)") gr.Markdown("**Dataset**: 92 samples | **Epochs**: 3 | **4-bit + LoRA**") with gr.Row(): train_btn = gr.Button("šŸš€ START TRAINING", variant="primary") output = gr.Textbox(label="Training Output", lines=20, value="Click 'START TRAINING' to begin...") refresh_btn = gr.Button("šŸ”„ Refresh Logs", variant="secondary") train_btn.click(start_training, outputs=output) refresh_btn.click(get_training_log, outputs=output) # Auto-load on page refresh demo.load(fn=get_training_log, outputs=output) gr.Markdown("**Expected time: ~5-10 minutes on L40S x4**") demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssl_verify=False, show_error=True)