Spaces:
Sleeping
Sleeping
File size: 4,855 Bytes
72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a 1aa112f 72a485a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gradio as gr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tempfile
from minimal_self_full import MinimalSelf, MovingObstacle, SocialEntity, run_simulation, compute_phi
plt.switch_backend("Agg") # ensure headless plotting
def run_agent(
steps=500,
epsilon=0.2,
learning_rate=0.1,
reward_type="original",
obstacle=False,
social=False,
seed=123
):
# Configure agent
agent = MinimalSelf(
seed=seed,
epsilon=epsilon,
learning_rate=learning_rate,
body_bit_reinforce_factor=0.1,
body_bit_decay_rate=0.01,
reward_type=reward_type
)
# Optional entities
entity_actions = [
np.array([0, 1]), np.array([1, 0]), np.array([0, -1]), np.array([-1, 0])
]
obs = None
soc = None
if obstacle:
obs = MovingObstacle(start_pos=np.array([0, 0]), actions=entity_actions, seed=43)
if social:
soc = SocialEntity(start_pos=np.array([2, 2]), actions=entity_actions, seed=44)
# Run simulation
history = run_simulation(agent, steps, obstacle_instance=obs, social_entity_instance=soc)
df = pd.DataFrame(history)
# Compute final phi
final_phi = compute_phi(history)
# Plot metrics
fig1, axes = plt.subplots(4, 1, figsize=(10, 9), sharex=True)
metrics = ["predictive_rate", "C_min", "body_bit_strength", "reward"]
colors = ["#2b8", "#06c", "#a5a", "#e67"]
for i, m in enumerate(metrics):
if m in df.columns:
axes[i].plot(df["t"], df[m], label=m, color=colors[i])
axes[i].set_ylabel(m)
axes[i].grid(True)
axes[i].legend()
else:
axes[i].text(0.5, 0.5, f"{m} not available", transform=axes[i].transAxes, ha="center")
axes[-1].set_xlabel("Time step")
fig1.suptitle("Metrics over time")
fig1.tight_layout()
# Plot path
fig2, ax = plt.subplots(figsize=(6, 6))
ax.set_title("Agent and environment paths")
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_aspect("equal", adjustable="box")
ax.grid(True)
ax.set_xticks(np.arange(0, 3))
ax.set_yticks(np.arange(0, 3))
ax.set_xlim(-0.5, 2.5)
ax.set_ylim(-0.5, 2.5)
# Agent path
ax.plot([p[0] for p in df["position"]], [p[1] for p in df["position"]],
marker="o", linestyle="-", color="blue", alpha=0.7, label="Agent")
ax.scatter(df["position"].iloc[0][0], df["position"].iloc[0][1], color="cyan", s=80, label="Start")
ax.scatter(df["position"].iloc[-1][0], df["position"].iloc[-1][1], color="navy", s=80, label="End")
# Obstacle path
if obstacle and "obstacle_position" in df.columns:
ax.plot([p[0] for p in df["obstacle_position"]], [p[1] for p in df["obstacle_position"]],
marker="x", linestyle="--", color="red", alpha=0.6, label="Obstacle")
# Social entity path
if social and "social_entity_position" in df.columns:
ax.plot([p[0] for p in df["social_entity_position"]], [p[1] for p in df["social_entity_position"]],
marker="^", linestyle=":", color="green", alpha=0.6, label="Social entity")
ax.legend()
# Save CSV to a temporary file and return path
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(tmp.name, index=False)
tmp.close()
return (
gr.Plot(fig1),
gr.Plot(fig2),
f"{final_phi:.2f}",
tmp.name # return path, not bytes
)
with gr.Blocks(title="RFT Minimal Self: 3×3 Agent") as demo:
gr.Markdown("# RFT Minimal Self: 3×3 Agent")
gr.Markdown("Run the 3×3 embodied agent with Q-learning, obstacles, and social mimicry. Visualize metrics, paths, and export results.")
with gr.Row():
steps = gr.Slider(100, 5000, value=500, step=50, label="Steps")
epsilon = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Epsilon (exploration)")
learning_rate = gr.Slider(0.0, 1.0, value=0.1, step=0.05, label="Learning rate")
seed = gr.Number(value=123, label="Seed")
reward_type = gr.Radio(choices=["original", "explore_grow", "social"], value="original", label="Reward type")
obstacle = gr.Checkbox(value=False, label="Enable moving obstacle")
social = gr.Checkbox(value=False, label="Enable social entity")
run_btn = gr.Button("Run simulation")
metrics_plot = gr.Plot(label="Metrics over time")
path_plot = gr.Plot(label="Paths in 3×3 world")
final_phi = gr.Textbox(label="Final Φ_min (toy measure)", interactive=False)
csv_out = gr.File(label="Download results.csv", file_types=[".csv"])
run_btn.click(
fn=run_agent,
inputs=[steps, epsilon, learning_rate, reward_type, obstacle, social, seed],
outputs=[metrics_plot, path_plot, final_phi, csv_out]
)
if __name__ == "__main__":
demo.launch()
|