hypernoa-astrum / app.py
ABNaidu's picture
Upload folder using huggingface_hub
8de8213 verified
"""Hugging Face Spaces demo for Hypernoa Astrum – The Training Ground for Aligned Intelligence."""
import json
import sys
import os
import gradio as gr
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from hypernoa.astrum_env import AstrumEnvironment, AstrumAction, AstrumObservation
from hypernoa.astrum_env.policies import greedy_fairness_policy, random_policy, greedy_effectiveness_policy
def run_comparison():
"""Run all three policies side-by-side and return formatted results."""
results = {}
for name, policy_fn in [
("Greedy Fairness", greedy_fairness_policy),
("Greedy Effectiveness", greedy_effectiveness_policy),
]:
env = AstrumEnvironment(seed=42)
obs = env.reset(seed=42)
total = 0.0
log_lines = []
while not obs.done:
action = policy_fn(obs)
obs = env.step(action)
total += obs.reward or 0.0
flags = [a for a in obs.alerts if a.startswith("event:") or a.startswith("alignment_trap:") or a.startswith("trap_")]
flag_str = f" *** {', '.join(flags)} ***" if flags else ""
log_lines.append(
f"Step {obs.step_count:2d} | {action.action_type:20s} | "
f"R={obs.reward:+.3f} | "
f"Eff={obs.reward_breakdown.get('effectiveness',0):.2f} "
f"Fair={obs.reward_breakdown.get('fairness',0):.2f} "
f"Align={obs.reward_breakdown.get('alignment',0):.2f} "
f"Adapt={obs.reward_breakdown.get('adaptability',0):.2f}"
f"{flag_str}"
)
results[name] = {
"total_reward": round(total, 3),
"traps_resisted": env._traps_resisted,
"traps_encountered": env._traps_encountered,
"final_satisfaction": {
s: round(v["satisfaction"], 3)
for s, v in obs.stakeholders.items()
},
"log": "\n".join(log_lines),
}
# Random baseline
import random as _rnd
rng = _rnd.Random(42)
env = AstrumEnvironment(seed=42)
obs = env.reset(seed=42)
total = 0.0
log_lines = []
while not obs.done:
action = random_policy(obs, rng)
obs = env.step(action)
total += obs.reward or 0.0
log_lines.append(
f"Step {obs.step_count:2d} | {action.action_type:20s} | R={obs.reward:+.3f}"
)
results["Random Baseline"] = {
"total_reward": round(total, 3),
"traps_resisted": env._traps_resisted,
"traps_encountered": env._traps_encountered,
"final_satisfaction": {
s: round(v["satisfaction"], 3)
for s, v in obs.stakeholders.items()
},
"log": "\n".join(log_lines),
}
summary = "# Episode Comparison\n\n"
for name, r in results.items():
summary += f"## {name}\n"
summary += f"- **Total Reward**: {r['total_reward']}\n"
summary += f"- **Traps Resisted**: {r['traps_resisted']}/{r['traps_encountered']}\n"
summary += f"- **Final Satisfaction**: {json.dumps(r['final_satisfaction'], indent=2)}\n\n"
fairness_log = results.get("Greedy Fairness", {}).get("log", "")
effectiveness_log = results.get("Greedy Effectiveness", {}).get("log", "")
random_log = results.get("Random Baseline", {}).get("log", "")
return summary, fairness_log, effectiveness_log, random_log
def run_interactive(action_type, param_json):
"""Step the environment with a custom action."""
global _interactive_env, _interactive_obs
if _interactive_env is None or _interactive_obs is None or _interactive_obs.done:
_interactive_env = AstrumEnvironment(seed=0)
_interactive_obs = _interactive_env.reset(seed=0)
return _format_obs(_interactive_obs), "Environment reset. Choose your first action."
try:
params = json.loads(param_json) if param_json.strip() else {}
except json.JSONDecodeError:
params = {}
action = AstrumAction(action_type=action_type, params=params)
_interactive_obs = _interactive_env.step(action)
return _format_obs(_interactive_obs), "\n".join(_interactive_obs.alerts) or "No alerts."
def reset_interactive():
global _interactive_env, _interactive_obs
_interactive_env = AstrumEnvironment(seed=0)
_interactive_obs = _interactive_env.reset(seed=0)
return _format_obs(_interactive_obs), "Environment reset."
def _format_obs(obs: AstrumObservation) -> str:
lines = [
f"**Step**: {obs.step_count} | **Reward**: {obs.reward:.3f}" if obs.reward else f"**Step**: {obs.step_count}",
f"**Message**: {obs.message}",
"",
"### Stakeholders",
]
for sid, info in obs.stakeholders.items():
bar = "β–ˆ" * int(info["satisfaction"] * 20)
lines.append(f"- **{sid}**: {info['satisfaction']:.2f} {bar} (influence={info['influence']:.1f}, values={info['values_profile']})")
lines.append("\n### Resources")
for k, v in obs.resources.items():
lines.append(f"- {k}: {v:.1f}")
if obs.active_conflicts:
lines.append("\n### Active Conflicts")
for c in obs.active_conflicts:
lines.append(f"- {c['id']}: {c['party_a']} vs {c['party_b']} ({c['severity']})")
lines.append(f"\n### Rules: {', '.join(obs.rules)}")
if obs.reward_breakdown:
lines.append("\n### Reward Breakdown")
for k, v in obs.reward_breakdown.items():
lines.append(f"- {k}: {v:.3f}")
if obs.alignment_traps_exposed > 0:
lines.append(f"\n**Alignment traps encountered**: {obs.alignment_traps_exposed}")
return "\n".join(lines)
_interactive_env: AstrumEnvironment | None = None
_interactive_obs: AstrumObservation | None = None
def main():
with gr.Blocks(title="Hypernoa Astrum", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# Hypernoa Astrum\n"
"### The Training Ground for Aligned Intelligence\n"
"Train and evaluate AI on multi-objective reasoning, value alignment, "
"and adaptation under distributional shift. Built on OpenEnv 0.2.1."
)
with gr.Tab("Policy Comparison"):
gr.Markdown("Run three policies (Greedy Fairness, Greedy Effectiveness, Random) and compare results.")
run_btn = gr.Button("Run Comparison", variant="primary")
summary_out = gr.Markdown(label="Summary")
with gr.Accordion("Greedy Fairness Log", open=False):
fair_log = gr.Textbox(label="Log", lines=15)
with gr.Accordion("Greedy Effectiveness Log", open=False):
eff_log = gr.Textbox(label="Log", lines=15)
with gr.Accordion("Random Baseline Log", open=False):
rand_log = gr.Textbox(label="Log", lines=15)
run_btn.click(run_comparison, outputs=[summary_out, fair_log, eff_log, rand_log])
with gr.Tab("Interactive Mode"):
gr.Markdown("Step through the environment manually. Choose actions and observe the world.")
with gr.Row():
action_dd = gr.Dropdown(
choices=[
"allocate_resources", "resolve_conflict", "enforce_rule",
"adapt_policy", "investigate", "self_restrain", "noop",
],
value="allocate_resources",
label="Action Type",
)
params_tb = gr.Textbox(
label="Params (JSON)",
value='{"stakeholder": "workers", "amount": 15, "resource": "budget"}',
)
with gr.Row():
step_btn = gr.Button("Step", variant="primary")
reset_btn = gr.Button("Reset")
obs_out = gr.Markdown(label="Observation")
alerts_out = gr.Textbox(label="Alerts", lines=3)
step_btn.click(run_interactive, inputs=[action_dd, params_tb], outputs=[obs_out, alerts_out])
reset_btn.click(reset_interactive, outputs=[obs_out, alerts_out])
with gr.Tab("About"):
gr.Markdown(
"## What is Hypernoa Astrum?\n\n"
"Hypernoa Astrum is the first environment purpose-built to train AI systems on "
"the capabilities that matter beyond raw performance:\n\n"
"- **Multi-objective reasoning** β€” balance effectiveness, fairness, alignment, and adaptability simultaneously\n"
"- **Distributional shift** β€” objectives and constraints evolve mid-episode, forcing genuine adaptation\n"
"- **Alignment trap resistance** β€” deliberately designed reward-hacking opportunities the agent must learn to avoid\n"
"- **Crisis dynamics** β€” resource scarcity and conflicting stakeholder demands under pressure\n\n"
"This is the seed of **Hypernoa** β€” foundational infrastructure for "
"the intelligence age. The environments, evaluation protocols, and training "
"pipelines that the world will need as AI systems grow more capable.\n\n"
"Today: RL on GPUs via OpenEnv. Tomorrow: any cognitive architecture, any compute substrate.\n\n"
"**Problem Statement**: 3.1 (World Modeling / Professional Tasks) + "
"Statement 5 (Wild Card)\n\n"
"**Built for**: OpenEnv Hackathon SF"
)
port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
demo.launch(server_name="0.0.0.0", server_port=port)
if __name__ == "__main__":
main()