Spaces:

mlopez6132
/

nano-coder-free

Runtime error

App Files Files Community

mlopez6132 commited on Jul 20

Commit

9d2bb4c

verified ·

1 Parent(s): 4f7f312

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +196 -0

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""
+Hugging Face Space App for Free H200 Training
+This app runs nano-coder training on HF's free H200 GPU (4 minutes daily)
+"""
+import os
+import subprocess
+import time
+import gradio as gr
+from datetime import datetime, timedelta
+# Configuration
+MAX_TRAINING_TIME = 3.5 * 60  # 3.5 minutes to be safe
+TRAINING_SCRIPT = "hf_free_training.py"
+DATA_PREP_SCRIPT = "prepare_code_dataset.py"
+def check_daily_limit():
+    """Check if we've used today's free H200 time."""
+    today = datetime.now().date()
+    limit_file = f"daily_limit_{today}.txt"
+    if os.path.exists(limit_file):
+        with open(limit_file, 'r') as f:
+            last_run = f.read().strip()
+        if last_run == str(today):
+            return False, "Daily H200 limit reached. Try again tomorrow!"
+    return True, "Ready to train!"
+def mark_daily_usage():
+    """Mark that we've used today's free time."""
+    today = datetime.now().date()
+    limit_file = f"daily_limit_{today}.txt"
+    with open(limit_file, 'w') as f:
+        f.write(str(today))
+def run_training():
+    """Run the free H200 training."""
+    # Check daily limit
+    can_run, message = check_daily_limit()
+    if not can_run:
+        return message
+    try:
+        # Mark usage
+        mark_daily_usage()
+        # Prepare dataset if not already done
+        if not os.path.exists("data/python-codes-25k/train.bin"):
+            print("Preparing dataset...")
+            subprocess.run(["python", DATA_PREP_SCRIPT], check=True)
+        # Run training
+        print("Starting free H200 training...")
+        start_time = time.time()
+        # Run training with timeout
+        process = subprocess.Popen(
+            ["python", TRAINING_SCRIPT],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            universal_newlines=True
+        )
+        output_lines = []
+        while True:
+            elapsed = time.time() - start_time
+            if elapsed > MAX_TRAINING_TIME:
+                process.terminate()
+                output_lines.append(f"\n⏰ Time limit reached ({elapsed/60:.1f} minutes)")
+                break
+            line = process.stdout.readline()
+            if not line and process.poll() is not None:
+                break
+            if line:
+                output_lines.append(line.strip())
+                print(line.strip())
+        # Wait for process to finish
+        process.wait()
+        # Check if training completed successfully
+        if process.returncode == 0:
+            result = "✅ Training completed successfully!\n\n" + "\n".join(output_lines[-20:])  # Last 20 lines
+        else:
+            result = "❌ Training failed or was interrupted.\n\n" + "\n".join(output_lines[-20:])
+        return result
+    except Exception as e:
+        return f"❌ Error during training: {str(e)}"
+def check_model_status():
+    """Check if trained model exists."""
+    model_path = "out-nano-coder-free/ckpt.pt"
+    if os.path.exists(model_path):
+        # Get file size
+        size = os.path.getsize(model_path) / (1024 * 1024)  # MB
+        return f"✅ Model found! Size: {size:.1f} MB"
+    else:
+        return "❌ No trained model found. Run training first."
+def generate_sample_code(prompt, max_tokens=100, temperature=0.8):
+    """Generate code using the trained model."""
+    if not os.path.exists("out-nano-coder-free/ckpt.pt"):
+        return "❌ No trained model found. Please run training first."
+    try:
+        # Import and run sampling
+        from sample_nano_coder import load_model, load_vocab, generate_code
+        model, checkpoint = load_model()
+        stoi, itos = load_vocab()
+        # Generate code
+        completion = generate_code(model, stoi, itos, prompt, max_tokens, temperature, 200)
+        return f"Generated code:\n\n{completion}"
+    except Exception as e:
+        return f"❌ Error generating code: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Nano-Coder Free H200 Training") as demo:
+    gr.Markdown("# 🚀 Nano-Coder Free H200 Training")
+    gr.Markdown("Train a nanoGPT model for Python code generation using Hugging Face's free H200 GPU (4 minutes daily)")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🎯 Training Control")
+            train_button = gr.Button("🚀 Start Free H200 Training", variant="primary")
+            status_text = gr.Textbox(label="Training Status", lines=10, interactive=False)
+        with gr.Column():
+            gr.Markdown("### 📊 Model Status")
+            model_status_button = gr.Button("🔍 Check Model Status")
+            model_status_text = gr.Textbox(label="Model Status", lines=2, interactive=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🎨 Code Generation")
+            code_prompt = gr.Textbox(
+                label="Code Prompt",
+                placeholder="def fibonacci(n):\n    ",
+                lines=3
+            )
+            with gr.Row():
+                max_tokens = gr.Slider(50, 500, 100, label="Max Tokens")
+                temperature = gr.Slider(0.1, 2.0, 0.8, label="Temperature")
+            generate_button = gr.Button("✨ Generate Code")
+            generated_code = gr.Textbox(label="Generated Code", lines=10, interactive=False)
+    # Event handlers
+    train_button.click(
+        fn=run_training,
+        outputs=status_text
+    )
+    model_status_button.click(
+        fn=check_model_status,
+        outputs=model_status_text
+    )
+    generate_button.click(
+        fn=generate_sample_code,
+        inputs=[code_prompt, max_tokens, temperature],
+        outputs=generated_code
+    )
+    gr.Markdown("""
+    ### 📋 Instructions
+    1. **Daily Limit**: You get 4 minutes of free H200 GPU time per day
+    2. **Training**: Click "Start Free H200 Training" to begin
+    3. **Model**: Check model status after training
+    4. **Generation**: Use the trained model to generate Python code
+    ### ⚙️ Model Configuration (Free Tier)
+    - **Layers**: 6 (reduced from 12)
+    - **Heads**: 6 (reduced from 12)
+    - **Embedding**: 384 (reduced from 768)
+    - **Context**: 512 tokens
+    - **Parameters**: ~15M (vs 124M full model)
+    ### 💡 Tips
+    - Training automatically stops at 3.5 minutes to be safe
+    - Model checkpoints are saved to HF Hub
+    - Use shorter prompts for better results
+    """)
+if __name__ == "__main__":
+    demo.launch()