Spaces:
Paused
Paused
| """ | |
| Gradio app for MentorFlow - Teacher-Student RL System | |
| Deployed on Hugging Face Spaces with GPU support | |
| """ | |
| import gradio as gr | |
| import sys | |
| import os | |
| from pathlib import Path | |
| # Add project paths | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| sys.path.insert(0, str(Path(__file__).parent / "teacher_agent_dev")) | |
| sys.path.insert(0, str(Path(__file__).parent / "student_agent_dev")) | |
| def run_comparison(iterations: int, seed: int, use_deterministic: bool, device: str, progress=gr.Progress()): | |
| """ | |
| Run strategy comparison with LM Student. | |
| Args: | |
| iterations: Number of training iterations | |
| seed: Random seed (ignored if deterministic) | |
| use_deterministic: Use fixed seed=42 | |
| device: 'cpu' or 'cuda' (GPU) | |
| progress: Gradio progress tracker | |
| """ | |
| import subprocess | |
| import io | |
| from contextlib import redirect_stdout, redirect_stderr | |
| # Set device environment variable and modify compare_strategies to use it | |
| if device == "cuda": | |
| # Check if CUDA is actually available | |
| try: | |
| import torch | |
| if not torch.cuda.is_available(): | |
| return "β οΈ GPU requested but not available. Using CPU instead.", None | |
| except: | |
| pass | |
| os.environ["CUDA_DEVICE"] = "cuda" | |
| else: | |
| os.environ["CUDA_DEVICE"] = "cpu" | |
| # Prepare command | |
| cmd = [ | |
| sys.executable, | |
| "teacher_agent_dev/compare_strategies.py", | |
| "--iterations", str(iterations), | |
| ] | |
| if use_deterministic: | |
| cmd.append("--deterministic") | |
| else: | |
| cmd.extend(["--seed", str(int(seed))]) | |
| try: | |
| progress(0.1, desc="Starting comparison...") | |
| result = subprocess.run( | |
| cmd, | |
| cwd=str(Path(__file__).parent), | |
| capture_output=True, | |
| text=True, | |
| timeout=3600 # 1 hour timeout | |
| ) | |
| stdout_text = result.stdout | |
| stderr_text = result.stderr | |
| # Combine outputs | |
| full_output = f"=== STDOUT ===\n{stdout_text}\n\n=== STDERR ===\n{stderr_text}" | |
| progress(0.9, desc="Processing results...") | |
| if result.returncode != 0: | |
| return f"β Error occurred:\n{full_output}", None | |
| # Find output plot | |
| plot_path = Path(__file__).parent / "teacher_agent_dev" / "comparison_all_strategies.png" | |
| if plot_path.exists(): | |
| progress(1.0, desc="Complete!") | |
| return f"β Comparison complete!\n\n{stdout_text}", str(plot_path) | |
| else: | |
| return f"β οΈ Plot not found, but output:\n\n{full_output}", None | |
| except subprocess.TimeoutExpired: | |
| return "β Timeout: Comparison took longer than 1 hour", None | |
| except Exception as e: | |
| import traceback | |
| return f"β Error: {str(e)}\n\n{traceback.format_exc()}", None | |
| def check_gpu(): | |
| """Check if GPU is available.""" | |
| try: | |
| import torch | |
| if torch.cuda.is_available(): | |
| return f"β GPU Available: {torch.cuda.get_device_name(0)}" | |
| else: | |
| return "β οΈ No GPU available, using CPU" | |
| except: | |
| return "β οΈ Could not check GPU status" | |
| # Create Gradio interface | |
| with gr.Blocks(title="MentorFlow - Strategy Comparison") as demo: | |
| gr.Markdown(""" | |
| # π MentorFlow - Teacher-Student RL System | |
| Compare three training strategies using LM Student (DistilBERT): | |
| 1. **Random Strategy**: Random questions until student can pass difficult questions | |
| 2. **Progressive Strategy**: Easy β Medium β Hard within each family | |
| 3. **Teacher Strategy**: RL teacher agent learns optimal curriculum | |
| ## Usage | |
| 1. Set parameters below | |
| 2. Click "Run Comparison" to start training | |
| 3. View results and generated plots | |
| **Note**: With LM Student, this will take 15-30 minutes for 500 iterations. | |
| """) | |
| # GPU Status | |
| with gr.Row(): | |
| gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False) | |
| refresh_btn = gr.Button("π Refresh GPU Status") | |
| refresh_btn.click(fn=check_gpu, outputs=gpu_status) | |
| # Parameters | |
| with gr.Row(): | |
| with gr.Column(): | |
| iterations = gr.Slider( | |
| minimum=50, | |
| maximum=500, | |
| value=100, | |
| step=50, | |
| label="Iterations", | |
| info="Number of training iterations (higher = longer runtime)" | |
| ) | |
| seed = gr.Number( | |
| value=42, | |
| label="Random Seed", | |
| info="Seed for reproducibility (ignored if deterministic)" | |
| ) | |
| use_deterministic = gr.Checkbox( | |
| value=True, | |
| label="Deterministic Mode", | |
| info="Use fixed seed=42 for reproducible results" | |
| ) | |
| device = gr.Radio( | |
| choices=["cuda", "cpu"], | |
| value="cuda", | |
| label="Device", | |
| info="Use GPU (cuda) if available, CPU otherwise" | |
| ) | |
| with gr.Column(): | |
| run_btn = gr.Button("π Run Comparison", variant="primary", size="lg") | |
| # Output | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox( | |
| label="Output", | |
| lines=15, | |
| max_lines=30, | |
| interactive=False | |
| ) | |
| with gr.Column(scale=1): | |
| output_plot = gr.Image( | |
| label="Comparison Plot", | |
| type="filepath", | |
| height=500 | |
| ) | |
| # Run comparison | |
| run_btn.click( | |
| fn=run_comparison, | |
| inputs=[iterations, seed, use_deterministic, device], | |
| outputs=[output_text, output_plot] | |
| ) | |
| gr.Markdown(""" | |
| ## π Understanding Results | |
| The comparison plot shows: | |
| - **Learning Curves**: How each strategy improves over time | |
| - **Difficult Question Performance**: Accuracy on hard questions | |
| - **Curriculum Diversity**: Topic coverage over time | |
| - **Learning Efficiency**: Iterations to reach target vs final performance | |
| The **Teacher Strategy** should ideally outperform Random and Progressive strategies. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(share=False, server_name="0.0.0.0", server_port=7860) | |