File size: 3,734 Bytes
bfc24c4 6dc86df d03c67a bfc24c4 95b6e6d d03c67a 95b6e6d bfc24c4 6dc86df d03c67a 6dc86df f779211 95b6e6d bfc24c4 95b6e6d bfc24c4 95b6e6d d03c67a bfc24c4 95b6e6d bfc24c4 95b6e6d bfc24c4 95b6e6d 6dc86df d03c67a 6dc86df d03c67a bfc24c4 dd4f5ac 3e8f1b5 ef82471 bfc24c4 6dc86df bfc24c4 3f15b4c bfc24c4 3f15b4c bfc24c4 3f15b4c 6dc86df f779211 bfc24c4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | import gradio as gr
import subprocess
import os
import threading
import sys
import time
from pathlib import Path
# Training log file
TRAINING_LOG = "/tmp/training.log"
TRAINING_PID_FILE = "/tmp/training.pid"
def start_training():
"""Start D1337 CIPHER training - SUBPROCESS VERSION (MORE RELIABLE)"""
# Check if already running
if Path(TRAINING_PID_FILE).exists():
try:
pid = int(Path(TRAINING_PID_FILE).read_text())
# Check if process still running
os.kill(pid, 0) # Will raise if not running
return "⚠️ Training already running! Check logs below..."
except:
# Process dead, remove PID file
Path(TRAINING_PID_FILE).unlink()
# Clear log file
Path(TRAINING_LOG).write_text("🔥 D1337 CIPHER TRAINING STARTING...\n\n", encoding="utf-8")
# Use optimized training script for L40S x4
runner_path = "/app/train.py"
# Start subprocess
def run_training():
try:
# Run training script and redirect output to log
with open(TRAINING_LOG, "a", encoding="utf-8") as log_file:
process = subprocess.Popen(
[sys.executable, runner_path],
stdout=log_file,
stderr=subprocess.STDOUT,
cwd="/app",
env=os.environ.copy()
)
# Write PID
Path(TRAINING_PID_FILE).write_text(str(process.pid))
# Wait for completion
process.wait()
# Remove PID file
if Path(TRAINING_PID_FILE).exists():
Path(TRAINING_PID_FILE).unlink()
except Exception as e:
error_msg = f"\n❌ ERROR: {str(e)}\n"
Path(TRAINING_LOG).write_text(error_msg, encoding="utf-8", mode="a")
if Path(TRAINING_PID_FILE).exists():
Path(TRAINING_PID_FILE).unlink()
# Run in background thread
thread = threading.Thread(target=run_training)
thread.daemon = False
thread.start()
return "🔥 D1337 CIPHER TRAINING STARTED!\n\nLoading model (31B) - this may take 2-5 minutes...\n\nOutput will appear below automatically (refresh every 3 seconds)."
def get_training_log():
"""Get latest training log"""
try:
if Path(TRAINING_LOG).exists():
content = Path(TRAINING_LOG).read_text(encoding="utf-8")
if content.strip():
return content
return "Waiting for training to start...\n\nClick 'START TRAINING' button above."
except Exception as e:
return f"Error reading log: {str(e)}"
# UI
with gr.Blocks(title="D1337 CIPHER Training") as demo:
gr.Markdown("# 🔥 D1337 CIPHER C2 V.1 - TRAINING")
gr.Markdown("**Hardware**: L40S x4 (192GB VRAM)")
gr.Markdown("**Base**: Huihui-GLM-4.7-Flash-abliterated (31B)")
gr.Markdown("**Dataset**: 92 samples | **Epochs**: 3 | **4-bit + LoRA**")
with gr.Row():
train_btn = gr.Button("🚀 START TRAINING", variant="primary")
output = gr.Textbox(label="Training Output", lines=20, value="Click 'START TRAINING' to begin...")
refresh_btn = gr.Button("🔄 Refresh Logs", variant="secondary")
train_btn.click(start_training, outputs=output)
refresh_btn.click(get_training_log, outputs=output)
# Auto-load on page refresh
demo.load(fn=get_training_log, outputs=output)
gr.Markdown("**Expected time: ~5-10 minutes on L40S x4**")
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssl_verify=False, show_error=True) |