import gradio as gr import numpy as np import pandas as pd import matplotlib.pyplot as plt import tempfile from minimal_self_full import MinimalSelf, MovingObstacle, SocialEntity, run_simulation, compute_phi plt.switch_backend("Agg") # ensure headless plotting def run_agent( steps=500, epsilon=0.2, learning_rate=0.1, reward_type="original", obstacle=False, social=False, seed=123 ): # Configure agent agent = MinimalSelf( seed=seed, epsilon=epsilon, learning_rate=learning_rate, body_bit_reinforce_factor=0.1, body_bit_decay_rate=0.01, reward_type=reward_type ) # Optional entities entity_actions = [ np.array([0, 1]), np.array([1, 0]), np.array([0, -1]), np.array([-1, 0]) ] obs = None soc = None if obstacle: obs = MovingObstacle(start_pos=np.array([0, 0]), actions=entity_actions, seed=43) if social: soc = SocialEntity(start_pos=np.array([2, 2]), actions=entity_actions, seed=44) # Run simulation history = run_simulation(agent, steps, obstacle_instance=obs, social_entity_instance=soc) df = pd.DataFrame(history) # Compute final phi final_phi = compute_phi(history) # Plot metrics fig1, axes = plt.subplots(4, 1, figsize=(10, 9), sharex=True) metrics = ["predictive_rate", "C_min", "body_bit_strength", "reward"] colors = ["#2b8", "#06c", "#a5a", "#e67"] for i, m in enumerate(metrics): if m in df.columns: axes[i].plot(df["t"], df[m], label=m, color=colors[i]) axes[i].set_ylabel(m) axes[i].grid(True) axes[i].legend() else: axes[i].text(0.5, 0.5, f"{m} not available", transform=axes[i].transAxes, ha="center") axes[-1].set_xlabel("Time step") fig1.suptitle("Metrics over time") fig1.tight_layout() # Plot path fig2, ax = plt.subplots(figsize=(6, 6)) ax.set_title("Agent and environment paths") ax.set_xlabel("X") ax.set_ylabel("Y") ax.set_aspect("equal", adjustable="box") ax.grid(True) ax.set_xticks(np.arange(0, 3)) ax.set_yticks(np.arange(0, 3)) ax.set_xlim(-0.5, 2.5) ax.set_ylim(-0.5, 2.5) # Agent path ax.plot([p[0] for p in df["position"]], [p[1] for p in df["position"]], marker="o", linestyle="-", color="blue", alpha=0.7, label="Agent") ax.scatter(df["position"].iloc[0][0], df["position"].iloc[0][1], color="cyan", s=80, label="Start") ax.scatter(df["position"].iloc[-1][0], df["position"].iloc[-1][1], color="navy", s=80, label="End") # Obstacle path if obstacle and "obstacle_position" in df.columns: ax.plot([p[0] for p in df["obstacle_position"]], [p[1] for p in df["obstacle_position"]], marker="x", linestyle="--", color="red", alpha=0.6, label="Obstacle") # Social entity path if social and "social_entity_position" in df.columns: ax.plot([p[0] for p in df["social_entity_position"]], [p[1] for p in df["social_entity_position"]], marker="^", linestyle=":", color="green", alpha=0.6, label="Social entity") ax.legend() # Save CSV to a temporary file and return path tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") df.to_csv(tmp.name, index=False) tmp.close() return ( gr.Plot(fig1), gr.Plot(fig2), f"{final_phi:.2f}", tmp.name # return path, not bytes ) with gr.Blocks(title="RFT Minimal Self: 3×3 Agent") as demo: gr.Markdown("# RFT Minimal Self: 3×3 Agent") gr.Markdown("Run the 3×3 embodied agent with Q-learning, obstacles, and social mimicry. Visualize metrics, paths, and export results.") with gr.Row(): steps = gr.Slider(100, 5000, value=500, step=50, label="Steps") epsilon = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Epsilon (exploration)") learning_rate = gr.Slider(0.0, 1.0, value=0.1, step=0.05, label="Learning rate") seed = gr.Number(value=123, label="Seed") reward_type = gr.Radio(choices=["original", "explore_grow", "social"], value="original", label="Reward type") obstacle = gr.Checkbox(value=False, label="Enable moving obstacle") social = gr.Checkbox(value=False, label="Enable social entity") run_btn = gr.Button("Run simulation") metrics_plot = gr.Plot(label="Metrics over time") path_plot = gr.Plot(label="Paths in 3×3 world") final_phi = gr.Textbox(label="Final Φ_min (toy measure)", interactive=False) csv_out = gr.File(label="Download results.csv", file_types=[".csv"]) run_btn.click( fn=run_agent, inputs=[steps, epsilon, learning_rate, reward_type, obstacle, social, seed], outputs=[metrics_plot, path_plot, final_phi, csv_out] ) if __name__ == "__main__": demo.launch()