Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import subprocess | |
| import os | |
| import threading | |
| import sys | |
| import time | |
| from pathlib import Path | |
| # Training log file | |
| TRAINING_LOG = "/tmp/training.log" | |
| TRAINING_PID_FILE = "/tmp/training.pid" | |
| def start_training(): | |
| """Start D1337 CIPHER training - SUBPROCESS VERSION (MORE RELIABLE)""" | |
| # Check if already running | |
| if Path(TRAINING_PID_FILE).exists(): | |
| try: | |
| pid = int(Path(TRAINING_PID_FILE).read_text()) | |
| # Check if process still running | |
| os.kill(pid, 0) # Will raise if not running | |
| return "β οΈ Training already running! Check logs below..." | |
| except: | |
| # Process dead, remove PID file | |
| Path(TRAINING_PID_FILE).unlink() | |
| # Clear log file | |
| Path(TRAINING_LOG).write_text("π₯ D1337 CIPHER TRAINING STARTING...\n\n", encoding="utf-8") | |
| # Use optimized training script for L40S x4 | |
| runner_path = "/app/train.py" | |
| # Start subprocess | |
| def run_training(): | |
| try: | |
| # Run training script and redirect output to log | |
| with open(TRAINING_LOG, "a", encoding="utf-8") as log_file: | |
| process = subprocess.Popen( | |
| [sys.executable, runner_path], | |
| stdout=log_file, | |
| stderr=subprocess.STDOUT, | |
| cwd="/app", | |
| env=os.environ.copy() | |
| ) | |
| # Write PID | |
| Path(TRAINING_PID_FILE).write_text(str(process.pid)) | |
| # Wait for completion | |
| process.wait() | |
| # Remove PID file | |
| if Path(TRAINING_PID_FILE).exists(): | |
| Path(TRAINING_PID_FILE).unlink() | |
| except Exception as e: | |
| error_msg = f"\nβ ERROR: {str(e)}\n" | |
| Path(TRAINING_LOG).write_text(error_msg, encoding="utf-8", mode="a") | |
| if Path(TRAINING_PID_FILE).exists(): | |
| Path(TRAINING_PID_FILE).unlink() | |
| # Run in background thread | |
| thread = threading.Thread(target=run_training) | |
| thread.daemon = False | |
| thread.start() | |
| return "π₯ D1337 CIPHER TRAINING STARTED!\n\nLoading model (31B) - this may take 2-5 minutes...\n\nOutput will appear below automatically (refresh every 3 seconds)." | |
| def get_training_log(): | |
| """Get latest training log""" | |
| try: | |
| if Path(TRAINING_LOG).exists(): | |
| content = Path(TRAINING_LOG).read_text(encoding="utf-8") | |
| if content.strip(): | |
| return content | |
| return "Waiting for training to start...\n\nClick 'START TRAINING' button above." | |
| except Exception as e: | |
| return f"Error reading log: {str(e)}" | |
| # UI | |
| with gr.Blocks(title="D1337 CIPHER Training") as demo: | |
| gr.Markdown("# π₯ D1337 CIPHER C2 V.1 - TRAINING") | |
| gr.Markdown("**Hardware**: L40S x4 (192GB VRAM)") | |
| gr.Markdown("**Base**: Huihui-GLM-4.7-Flash-abliterated (31B)") | |
| gr.Markdown("**Dataset**: 92 samples | **Epochs**: 3 | **4-bit + LoRA**") | |
| with gr.Row(): | |
| train_btn = gr.Button("π START TRAINING", variant="primary") | |
| output = gr.Textbox(label="Training Output", lines=20, value="Click 'START TRAINING' to begin...") | |
| refresh_btn = gr.Button("π Refresh Logs", variant="secondary") | |
| train_btn.click(start_training, outputs=output) | |
| refresh_btn.click(get_training_log, outputs=output) | |
| # Auto-load on page refresh | |
| demo.load(fn=get_training_log, outputs=output) | |
| gr.Markdown("**Expected time: ~5-10 minutes on L40S x4**") | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssl_verify=False, show_error=True) |