Spaces:

iteratehack
/

MentorFlow

Paused

MentorFlow / app.py

Cornelius

Deploy MentorFlow with GPU support

a52f96d 15 days ago

6.57 kB

	"""
	Gradio app for MentorFlow - Teacher-Student RL System
	Deployed on Hugging Face Spaces with GPU support
	"""

	import gradio as gr
	import sys
	import os
	from pathlib import Path

	# Add project paths
	sys.path.insert(0, str(Path(__file__).parent))
	sys.path.insert(0, str(Path(__file__).parent / "teacher_agent_dev"))
	sys.path.insert(0, str(Path(__file__).parent / "student_agent_dev"))

	def run_comparison(iterations: int, seed: int, use_deterministic: bool, device: str, progress=gr.Progress()):
	"""
	Run strategy comparison with LM Student.

	Args:
	iterations: Number of training iterations
	seed: Random seed (ignored if deterministic)
	use_deterministic: Use fixed seed=42
	device: 'cpu' or 'cuda' (GPU)
	progress: Gradio progress tracker
	"""
	import subprocess
	import io
	from contextlib import redirect_stdout, redirect_stderr

	# Set device environment variable and modify compare_strategies to use it
	if device == "cuda":
	# Check if CUDA is actually available
	try:
	import torch
	if not torch.cuda.is_available():
	return "⚠️ GPU requested but not available. Using CPU instead.", None
	except:
	pass
	os.environ["CUDA_DEVICE"] = "cuda"
	else:
	os.environ["CUDA_DEVICE"] = "cpu"

	# Prepare command
	cmd = [
	sys.executable,
	"teacher_agent_dev/compare_strategies.py",
	"--iterations", str(iterations),
	]

	if use_deterministic:
	cmd.append("--deterministic")
	else:
	cmd.extend(["--seed", str(int(seed))])

	try:
	progress(0.1, desc="Starting comparison...")

	result = subprocess.run(
	cmd,
	cwd=str(Path(__file__).parent),
	capture_output=True,
	text=True,
	timeout=3600 # 1 hour timeout
	)

	stdout_text = result.stdout
	stderr_text = result.stderr

	# Combine outputs
	full_output = f"=== STDOUT ===\n{stdout_text}\n\n=== STDERR ===\n{stderr_text}"

	progress(0.9, desc="Processing results...")

	if result.returncode != 0:
	return f"❌ Error occurred:\n{full_output}", None

	# Find output plot
	plot_path = Path(__file__).parent / "teacher_agent_dev" / "comparison_all_strategies.png"
	if plot_path.exists():
	progress(1.0, desc="Complete!")
	return f"✅ Comparison complete!\n\n{stdout_text}", str(plot_path)
	else:
	return f"⚠️ Plot not found, but output:\n\n{full_output}", None

	except subprocess.TimeoutExpired:
	return "❌ Timeout: Comparison took longer than 1 hour", None
	except Exception as e:
	import traceback
	return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None


	def check_gpu():
	"""Check if GPU is available."""
	try:
	import torch
	if torch.cuda.is_available():
	return f"✅ GPU Available: {torch.cuda.get_device_name(0)}"
	else:
	return "⚠️ No GPU available, using CPU"
	except:
	return "⚠️ Could not check GPU status"


	# Create Gradio interface
	with gr.Blocks(title="MentorFlow - Strategy Comparison") as demo:
	gr.Markdown("""
	# 🎓 MentorFlow - Teacher-Student RL System

	Compare three training strategies using LM Student (DistilBERT):
	1. Random Strategy: Random questions until student can pass difficult questions
	2. Progressive Strategy: Easy → Medium → Hard within each family
	3. Teacher Strategy: RL teacher agent learns optimal curriculum

	## Usage

	1. Set parameters below
	2. Click "Run Comparison" to start training
	3. View results and generated plots

	Note: With LM Student, this will take 15-30 minutes for 500 iterations.
	""")

	# GPU Status
	with gr.Row():
	gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False)
	refresh_btn = gr.Button("🔄 Refresh GPU Status")

	refresh_btn.click(fn=check_gpu, outputs=gpu_status)

	# Parameters
	with gr.Row():
	with gr.Column():
	iterations = gr.Slider(
	minimum=50,
	maximum=500,
	value=100,
	step=50,
	label="Iterations",
	info="Number of training iterations (higher = longer runtime)"
	)

	seed = gr.Number(
	value=42,
	label="Random Seed",
	info="Seed for reproducibility (ignored if deterministic)"
	)

	use_deterministic = gr.Checkbox(
	value=True,
	label="Deterministic Mode",
	info="Use fixed seed=42 for reproducible results"
	)

	device = gr.Radio(
	choices=["cuda", "cpu"],
	value="cuda",
	label="Device",
	info="Use GPU (cuda) if available, CPU otherwise"
	)

	with gr.Column():
	run_btn = gr.Button("🚀 Run Comparison", variant="primary", size="lg")

	# Output
	with gr.Row():
	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="Output",
	lines=15,
	max_lines=30,
	interactive=False
	)

	with gr.Column(scale=1):
	output_plot = gr.Image(
	label="Comparison Plot",
	type="filepath",
	height=500
	)

	# Run comparison
	run_btn.click(
	fn=run_comparison,
	inputs=[iterations, seed, use_deterministic, device],
	outputs=[output_text, output_plot]
	)

	gr.Markdown("""
	## 📊 Understanding Results

	The comparison plot shows:
	- Learning Curves: How each strategy improves over time
	- Difficult Question Performance: Accuracy on hard questions
	- Curriculum Diversity: Topic coverage over time
	- Learning Efficiency: Iterations to reach target vs final performance

	The Teacher Strategy should ideally outperform Random and Progressive strategies.
	""")

	if __name__ == "__main__":
	demo.launch(share=False, server_name="0.0.0.0", server_port=7860)