File size: 2,772 Bytes
b41a704
e261fbe
 
 
b41a704
e261fbe
 
b41a704
e261fbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import subprocess
import os
from pathlib import Path

def run_training():
    """Run the fine-tuning process and stream output."""

    output_text = "Starting training...\n\n"
    yield output_text

    # Run the training script
    process = subprocess.Popen(
        ["python", "finetune.py"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    )

    # Stream output
    for line in process.stdout:
        output_text += line
        yield output_text

    process.wait()

    if process.returncode == 0:
        output_text += "\n\nβœ… Training completed successfully!"
        output_text += f"\n\nModel saved to: {os.path.abspath('./qwen-codeforces-cots')}"
    else:
        output_text += f"\n\n❌ Training failed with exit code {process.returncode}"

    yield output_text

def check_gpu():
    """Check GPU availability."""
    import torch
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
        return f"βœ… GPU Available: {gpu_name} ({gpu_memory:.1f} GB)"
    else:
        return "❌ No GPU available - training will be slow!"

# Create Gradio interface
with gr.Blocks(title="Qwen3 Fine-tuning on Codeforces") as demo:
    gr.Markdown("""
    # πŸš€ Qwen3-0.5B Fine-tuning on Codeforces CoTs

    Fine-tuning Qwen3-0.5B-Instruct on competitive programming problems with chain-of-thought reasoning.

    **Dataset**: open-r1/codeforces-cots (~48K examples)
    **Method**: QLoRA (LoRA + 4-bit quantization)
    **Training**: 1000 steps with checkpoints every 200 steps
    """)

    gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False)

    gr.Markdown("### Training Configuration")
    gr.Markdown("""
    - **Model**: Qwen/Qwen2.5-0.5B-Instruct
    - **Batch Size**: 1 (with gradient accumulation of 16)
    - **Learning Rate**: 2e-4
    - **Max Steps**: 1000
    - **LoRA Rank**: 16
    - **Trainable Parameters**: ~8.8M (1.75% of total)
    """)

    start_btn = gr.Button("🎯 Start Training", variant="primary", size="lg")
    output = gr.Textbox(
        label="Training Output",
        lines=25,
        max_lines=50,
        show_copy_button=True
    )

    start_btn.click(
        fn=run_training,
        inputs=[],
        outputs=[output]
    )

    gr.Markdown("""
    ### πŸ“ Notes
    - Training will take several hours depending on GPU speed
    - Checkpoints are saved every 200 steps to `./qwen-codeforces-cots/`
    - You can download the final model after training completes
    - The model will be compatible with the base Qwen2.5-0.5B-Instruct architecture
    """)

if __name__ == "__main__":
    demo.launch()