Spaces:

ZennyKenny
/

claude-code-fine-tune

Sleeping

kghamilton89

Add Qwen2.5-0.5B fine-tuning on Codeforces CoTs

e261fbe 6 days ago

2.77 kB

	import gradio as gr
	import subprocess
	import os
	from pathlib import Path

	def run_training():
	"""Run the fine-tuning process and stream output."""

	output_text = "Starting training...\n\n"
	yield output_text

	# Run the training script
	process = subprocess.Popen(
	["python", "finetune.py"],
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	text=True,
	bufsize=1
	)

	# Stream output
	for line in process.stdout:
	output_text += line
	yield output_text

	process.wait()

	if process.returncode == 0:
	output_text += "\n\n✅ Training completed successfully!"
	output_text += f"\n\nModel saved to: {os.path.abspath('./qwen-codeforces-cots')}"
	else:
	output_text += f"\n\n❌ Training failed with exit code {process.returncode}"

	yield output_text

	def check_gpu():
	"""Check GPU availability."""
	import torch
	if torch.cuda.is_available():
	gpu_name = torch.cuda.get_device_name(0)
	gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
	return f"✅ GPU Available: {gpu_name} ({gpu_memory:.1f} GB)"
	else:
	return "❌ No GPU available - training will be slow!"

	# Create Gradio interface
	with gr.Blocks(title="Qwen3 Fine-tuning on Codeforces") as demo:
	gr.Markdown("""
	# 🚀 Qwen3-0.5B Fine-tuning on Codeforces CoTs

	Fine-tuning Qwen3-0.5B-Instruct on competitive programming problems with chain-of-thought reasoning.

	Dataset: open-r1/codeforces-cots (~48K examples)
	Method: QLoRA (LoRA + 4-bit quantization)
	Training: 1000 steps with checkpoints every 200 steps
	""")

	gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False)

	gr.Markdown("### Training Configuration")
	gr.Markdown("""
	- Model: Qwen/Qwen2.5-0.5B-Instruct
	- Batch Size: 1 (with gradient accumulation of 16)
	- Learning Rate: 2e-4
	- Max Steps: 1000
	- LoRA Rank: 16
	- Trainable Parameters: ~8.8M (1.75% of total)
	""")

	start_btn = gr.Button("🎯 Start Training", variant="primary", size="lg")
	output = gr.Textbox(
	label="Training Output",
	lines=25,
	max_lines=50,
	show_copy_button=True
	)

	start_btn.click(
	fn=run_training,
	inputs=[],
	outputs=[output]
	)

	gr.Markdown("""
	### 📝 Notes
	- Training will take several hours depending on GPU speed
	- Checkpoints are saved every 200 steps to `./qwen-codeforces-cots/`
	- You can download the final model after training completes
	- The model will be compatible with the base Qwen2.5-0.5B-Instruct architecture
	""")

	if __name__ == "__main__":
	demo.launch()