Desorden1337
Fix: use dtype= like official example for GLM model
3e8f1b5
import gradio as gr
import subprocess
import os
import threading
import sys
import time
from pathlib import Path
# Training log file
TRAINING_LOG = "/tmp/training.log"
TRAINING_PID_FILE = "/tmp/training.pid"
def start_training():
"""Start D1337 CIPHER training - SUBPROCESS VERSION (MORE RELIABLE)"""
# Check if already running
if Path(TRAINING_PID_FILE).exists():
try:
pid = int(Path(TRAINING_PID_FILE).read_text())
# Check if process still running
os.kill(pid, 0) # Will raise if not running
return "⚠️ Training already running! Check logs below..."
except:
# Process dead, remove PID file
Path(TRAINING_PID_FILE).unlink()
# Clear log file
Path(TRAINING_LOG).write_text("πŸ”₯ D1337 CIPHER TRAINING STARTING...\n\n", encoding="utf-8")
# Use optimized training script for L40S x4
runner_path = "/app/train.py"
# Start subprocess
def run_training():
try:
# Run training script and redirect output to log
with open(TRAINING_LOG, "a", encoding="utf-8") as log_file:
process = subprocess.Popen(
[sys.executable, runner_path],
stdout=log_file,
stderr=subprocess.STDOUT,
cwd="/app",
env=os.environ.copy()
)
# Write PID
Path(TRAINING_PID_FILE).write_text(str(process.pid))
# Wait for completion
process.wait()
# Remove PID file
if Path(TRAINING_PID_FILE).exists():
Path(TRAINING_PID_FILE).unlink()
except Exception as e:
error_msg = f"\n❌ ERROR: {str(e)}\n"
Path(TRAINING_LOG).write_text(error_msg, encoding="utf-8", mode="a")
if Path(TRAINING_PID_FILE).exists():
Path(TRAINING_PID_FILE).unlink()
# Run in background thread
thread = threading.Thread(target=run_training)
thread.daemon = False
thread.start()
return "πŸ”₯ D1337 CIPHER TRAINING STARTED!\n\nLoading model (31B) - this may take 2-5 minutes...\n\nOutput will appear below automatically (refresh every 3 seconds)."
def get_training_log():
"""Get latest training log"""
try:
if Path(TRAINING_LOG).exists():
content = Path(TRAINING_LOG).read_text(encoding="utf-8")
if content.strip():
return content
return "Waiting for training to start...\n\nClick 'START TRAINING' button above."
except Exception as e:
return f"Error reading log: {str(e)}"
# UI
with gr.Blocks(title="D1337 CIPHER Training") as demo:
gr.Markdown("# πŸ”₯ D1337 CIPHER C2 V.1 - TRAINING")
gr.Markdown("**Hardware**: L40S x4 (192GB VRAM)")
gr.Markdown("**Base**: Huihui-GLM-4.7-Flash-abliterated (31B)")
gr.Markdown("**Dataset**: 92 samples | **Epochs**: 3 | **4-bit + LoRA**")
with gr.Row():
train_btn = gr.Button("πŸš€ START TRAINING", variant="primary")
output = gr.Textbox(label="Training Output", lines=20, value="Click 'START TRAINING' to begin...")
refresh_btn = gr.Button("πŸ”„ Refresh Logs", variant="secondary")
train_btn.click(start_training, outputs=output)
refresh_btn.click(get_training_log, outputs=output)
# Auto-load on page refresh
demo.load(fn=get_training_log, outputs=output)
gr.Markdown("**Expected time: ~5-10 minutes on L40S x4**")
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssl_verify=False, show_error=True)