Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import json | |
| import torch | |
| import subprocess | |
| import sys | |
| from dotenv import load_dotenv | |
| import logging | |
| import threading | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(), | |
| logging.FileHandler("app.log") | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Load environment variables | |
| load_dotenv() | |
| # Get script directory - important for Hugging Face Space paths | |
| SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| BASE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, ".")) | |
| # Load config file | |
| def load_config(config_path="transformers_config.json"): | |
| config_path = os.path.join(BASE_DIR, config_path) | |
| try: | |
| with open(config_path, 'r') as f: | |
| config = json.load(f) | |
| return config | |
| except Exception as e: | |
| logger.error(f"Error loading config: {str(e)}") | |
| return {} | |
| # Load configuration | |
| config = load_config() | |
| model_config = config.get("model_config", {}) | |
| # Model details from config | |
| MODEL_NAME = model_config.get("model_name_or_path", "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit") | |
| SPACE_NAME = os.getenv("HF_SPACE_NAME", "phi4training") | |
| # Function to run training in a thread and stream output to container logs | |
| def run_training(): | |
| """Run the training script and stream its output to container logs""" | |
| # Locate training script using absolute path | |
| training_script = os.path.join(BASE_DIR, "run_cloud_training.py") | |
| # Check if file exists and log the path | |
| if not os.path.exists(training_script): | |
| print(f"ERROR: Training script not found at: {training_script}") | |
| print(f"Current directory: {os.getcwd()}") | |
| print("Available files:") | |
| for file in os.listdir(BASE_DIR): | |
| print(f" - {file}") | |
| return | |
| print(f"Found training script at: {training_script}") | |
| process = subprocess.Popen( | |
| [sys.executable, training_script], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| universal_newlines=True, | |
| bufsize=1 | |
| ) | |
| # Stream output directly to sys.stdout (container logs) | |
| for line in iter(process.stdout.readline, ''): | |
| sys.stdout.write(line) | |
| sys.stdout.flush() | |
| # Function to start the training process | |
| def start_training(): | |
| try: | |
| # Print directly to container logs | |
| print("\n===== STARTING TRAINING PROCESS =====\n") | |
| print(f"Model: {MODEL_NAME}") | |
| print(f"Base directory: {BASE_DIR}") | |
| print(f"Current working directory: {os.getcwd()}") | |
| print(f"Training with configuration from transformers_config.json") | |
| print("Training logs will appear below:") | |
| print("=" * 50) | |
| # Start training in a separate thread | |
| training_thread = threading.Thread(target=run_training) | |
| training_thread.daemon = True # Allow the thread to be terminated when app exits | |
| training_thread.start() | |
| # Log the start of training | |
| logger.info("Training started in background thread") | |
| return """ | |
| ✅ Training process initiated! | |
| The model is now being fine-tuned in the background. | |
| To monitor progress: | |
| 1. Check the Hugging Face space logs in the "Logs" tab | |
| 2. You should see training output appearing directly in the logs | |
| 3. The process will continue running in the background | |
| NOTE: This is a research training phase only, no model outputs will be available. | |
| """ | |
| except Exception as e: | |
| logger.error(f"Error starting training: {str(e)}") | |
| return f"❌ Error starting training: {str(e)}" | |
| # Create Gradio interface - training status only, no model outputs | |
| with gr.Blocks(css="footer {visibility: hidden}") as demo: | |
| gr.Markdown(f"# {SPACE_NAME}: Research Training Dashboard") | |
| with gr.Row(): | |
| with gr.Column(): | |
| status = gr.Markdown( | |
| f""" | |
| ## DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit Training Dashboard | |
| **Model**: {MODEL_NAME} | |
| **Dataset**: phi4-cognitive-dataset | |
| This is a multidisciplinary research training phase. The model is not available for interactive use. | |
| ### Training Configuration: | |
| - **Epochs**: {config.get("training_config", {}).get("num_train_epochs", 3)} | |
| - **Batch Size**: {config.get("training_config", {}).get("per_device_train_batch_size", 2)} | |
| - **Gradient Accumulation Steps**: {config.get("training_config", {}).get("gradient_accumulation_steps", 4)} | |
| - **Learning Rate**: {config.get("training_config", {}).get("learning_rate", 2e-5)} | |
| - **Max Sequence Length**: {config.get("training_config", {}).get("max_seq_length", 2048)} | |
| ⚠️ **NOTE**: This space does not provide model outputs during the research training phase. | |
| All logs are available in the Hugging Face "Logs" tab. | |
| """ | |
| ) | |
| with gr.Row(): | |
| # Add button for starting training | |
| start_btn = gr.Button("Start Training", variant="primary") | |
| # Output area for training start messages | |
| training_output = gr.Markdown("") | |
| # Connect start button to function | |
| start_btn.click(start_training, outputs=training_output) | |
| gr.Markdown(""" | |
| ### Research Training Information | |
| This model is being fine-tuned on research-focused datasets and is not available for interactive querying. | |
| The training process will run in the background and logs will be available in the Hugging Face UI. | |
| #### Instructions | |
| 1. Click "Start Training" to begin the fine-tuning process | |
| 2. Monitor progress in the Hugging Face "Logs" tab | |
| 3. Training metrics and results will be saved to the output directory | |
| #### About This Project | |
| The model is being fine-tuned on the phi4-cognitive-dataset with a focus on research capabilities. | |
| This training phase does not include any interactive features or output generation. | |
| """) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| # Start Gradio with minimal features | |
| print("\n===== RESEARCH TRAINING DASHBOARD STARTED =====\n") | |
| print(f"Base directory: {BASE_DIR}") | |
| print(f"Current working directory: {os.getcwd()}") | |
| print("Available files:") | |
| for file in os.listdir(BASE_DIR): | |
| print(f" - {file}") | |
| print("\nClick 'Start Training' to begin the fine-tuning process") | |
| print("All training output will appear in these logs") | |
| logger.info("Starting research training dashboard") | |
| demo.launch(share=False) |