hypothesis-engine / hypothesis_engine /openenv_wrapper.py
AbhinavDubey30
v2.1: OpenEnv 0.2.1 integration + training notebook + HF Spaces deployment
1911467
"""
OpenEnv Integration for Hypothesis Engine.
Wraps HypothesisEngine as an openenv-core Environment (v0.2.1)
for deployment on HuggingFace Spaces and use with TRL/Unsloth training.
Usage:
from hypothesis_engine.openenv_wrapper import create_hypothesis_app
app = create_hypothesis_app()
"""
import json
from typing import Any, Dict, List, Optional
from openenv.core import (
Action,
Environment,
Observation,
State,
create_app,
)
from pydantic import Field
from .env import HypothesisEngine
# ---------------------------------------------------------------------------
# Custom types for OpenEnv
# ---------------------------------------------------------------------------
class HypothesisAction(Action):
"""Action for the Hypothesis Engine environment."""
action: str = Field(
description=(
"Action type: 'experiment', 'hypothesize', 'predict', "
"'get_status', or 'get_hint'"
)
)
inputs: Optional[Dict[str, float]] = Field(
default=None,
description="Input values for experiment action, e.g. {'x': 3.0}",
)
mode: Optional[str] = Field(
default=None,
description="Experiment mode: 'observe' or 'intervene' (causal worlds only)",
)
expression: Optional[str] = Field(
default=None,
description="Mathematical expression for hypothesize action, e.g. '2*x + 3'",
)
predictions: Optional[List[float]] = Field(
default=None,
description="List of predicted values for predict action",
)
class HypothesisObservation(Observation):
"""Observation returned by the Hypothesis Engine environment."""
text: str = Field(
default="",
description="Natural language observation for LLM agents",
)
phase: str = Field(
default="not_started",
description="Current episode phase: exploration, prediction, or done",
)
world_name: str = Field(default="", description="Name of the current world")
world_type: str = Field(default="", description="Type of the current world")
experiments_remaining: int = Field(
default=0, description="Experiments remaining in budget"
)
experiments_used: int = Field(
default=0, description="Experiments used so far"
)
action_space: str = Field(
default="", description="Description of available actions"
)
class HypothesisState(State):
"""Internal state of the Hypothesis Engine environment."""
difficulty: int = Field(default=1, description="Current difficulty level")
world_name: str = Field(default="", description="Current world name")
world_type: str = Field(default="", description="Current world type")
phase: str = Field(default="not_started", description="Episode phase")
experiments_used: int = Field(default=0, description="Experiments used")
experiments_remaining: int = Field(default=0, description="Experiments left")
hypothesis_count: int = Field(default=0, description="Hypotheses submitted")
best_hypothesis_score: float = Field(
default=0.0, description="Best hypothesis score so far"
)
# ---------------------------------------------------------------------------
# Observation formatter -- turns raw dict into LLM-friendly text
# ---------------------------------------------------------------------------
def _format_observation_text(raw_obs: Dict[str, Any], action_desc: str) -> str:
"""Convert a raw HypothesisEngine observation dict into a natural-language string."""
parts = []
# Message
if raw_obs.get("message"):
parts.append(raw_obs["message"])
# World info
world = raw_obs.get("world", {})
if world:
parts.append(
f"\n-- World: {world.get('world_name', '?')} "
f"(type: {world.get('world_type', '?')}, "
f"difficulty: {world.get('difficulty', '?')})"
)
parts.append(f" Description: {world.get('description', '')}")
parts.append(f" Variables: {world.get('variables', [])}")
if world.get('causal_mode'):
parts.append(
f" Causal Mode: This world supports observe AND intervene experiments."
)
# Budget
if "experiments_remaining" in raw_obs:
parts.append(
f"\n-- Budget: {raw_obs['experiments_remaining']} experiments remaining "
f"(used {raw_obs.get('experiments_used', 0)})"
)
# Last experiment result
if raw_obs.get("last_experiment_result"):
r = raw_obs["last_experiment_result"]
parts.append(f"\n-- Last Experiment: inputs={r.get('inputs')}, output={r.get('output')}")
if r.get("mode"):
parts.append(f" Mode: {r['mode']}")
# Hypothesis feedback
if raw_obs.get("hypothesis_feedback"):
hf = raw_obs["hypothesis_feedback"]
parts.append(f"\n-- Hypothesis Feedback: {hf.get('quality', '')}")
# Recent experiment history (last 5)
hist = raw_obs.get("experiment_history", [])
if hist:
parts.append(f"\n-- Recent Experiments ({len(hist)} shown):")
for i, exp in enumerate(hist[-5:], 1):
parts.append(f" {i}. inputs={exp.get('inputs')} -> output={exp.get('output')}")
# Test cases
tests = raw_obs.get("test_cases", [])
if tests:
parts.append(f"\n-- Test Cases to Predict ({len(tests)} total):")
for i, tc in enumerate(tests[:5], 1):
parts.append(f" {i}. {tc}")
if len(tests) > 5:
parts.append(f" ... and {len(tests) - 5} more")
# Final results
if raw_obs.get("final_results"):
fr = raw_obs["final_results"]
rb = fr.get("reward_breakdown", {})
parts.append(f"\n-- FINAL RESULTS --")
parts.append(f" Total Reward: {rb.get('total_reward', 0):.1f}/100")
parts.append(f" Ground Truth: {fr.get('ground_truth', '?')}")
parts.append(f" Passed: {fr.get('passed', False)}")
# Action space
parts.append(f"\n-- Available Actions --\n{action_desc}")
return "\n".join(parts)
# ---------------------------------------------------------------------------
# OpenEnv Environment
# ---------------------------------------------------------------------------
class HypothesisEngineOpenEnv(
Environment[HypothesisAction, HypothesisObservation, HypothesisState]
):
"""
OpenEnv-compatible wrapper for Hypothesis Engine.
This wraps HypothesisEngine to work with openenv-core 0.2.1,
enabling deployment on HuggingFace Spaces and training with TRL/Unsloth.
"""
SUPPORTS_CONCURRENT_SESSIONS = True
def __init__(
self,
difficulty: int = 1,
experiment_budget: int = 30,
auto_curriculum: bool = True,
use_self_play: bool = False,
**kwargs,
):
super().__init__(**kwargs)
self.difficulty = difficulty
self.experiment_budget = experiment_budget
self.auto_curriculum = auto_curriculum
self.use_self_play = use_self_play
self._env: Optional[HypothesisEngine] = None
self._last_raw_obs: Dict[str, Any] = {}
self._step_count = 0
def _ensure_env(self, seed: Optional[int] = None) -> HypothesisEngine:
"""Create a new HypothesisEngine instance."""
return HypothesisEngine(
difficulty=self.difficulty,
experiment_budget=self.experiment_budget,
seed=seed,
auto_curriculum=self.auto_curriculum,
use_self_play=self.use_self_play,
)
def reset(
self,
seed: Optional[int] = None,
episode_id: Optional[str] = None,
**kwargs,
) -> HypothesisObservation:
"""Reset the environment and return initial observation."""
self._env = self._ensure_env(seed=seed)
self._step_count = 0
raw_obs = self._env.reset(seed=seed)
self._last_raw_obs = raw_obs
action_desc = self._env.get_action_space_description()
text = _format_observation_text(raw_obs, action_desc)
world = raw_obs.get("world", {})
obs = HypothesisObservation(
text=text,
done=False,
reward=None,
phase=raw_obs.get("phase", "exploration"),
world_name=world.get("world_name", ""),
world_type=world.get("world_type", ""),
experiments_remaining=raw_obs.get("experiments_remaining", 0),
experiments_used=raw_obs.get("experiments_used", 0),
action_space=action_desc,
metadata={"episode_id": episode_id or "", "raw": raw_obs},
)
return self._apply_transform(obs)
def step(
self,
action: HypothesisAction,
timeout_s: Optional[float] = None,
**kwargs,
) -> HypothesisObservation:
"""Take a step in the environment."""
if self._env is None:
return HypothesisObservation(
text="Environment not started. Call reset() first.",
done=False,
reward=-1.0,
phase="not_started",
)
# Convert OpenEnv action to HypothesisEngine action dict
action_dict: Dict[str, Any] = {"action": action.action}
if action.inputs is not None:
action_dict["inputs"] = action.inputs
if action.mode is not None:
action_dict["mode"] = action.mode
if action.expression is not None:
action_dict["expression"] = action.expression
if action.predictions is not None:
action_dict["predictions"] = action.predictions
raw_obs, reward, done, info = self._env.step(action_dict)
self._last_raw_obs = raw_obs
self._step_count += 1
action_desc = self._env.get_action_space_description()
text = _format_observation_text(raw_obs, action_desc)
world = raw_obs.get("world", {})
obs = HypothesisObservation(
text=text,
done=done,
reward=reward,
phase=raw_obs.get("phase", ""),
world_name=world.get("world_name", ""),
world_type=world.get("world_type", ""),
experiments_remaining=raw_obs.get("experiments_remaining", 0),
experiments_used=raw_obs.get("experiments_used", 0),
action_space=action_desc if not done else "",
metadata={"info": info, "raw": raw_obs},
)
return self._apply_transform(obs)
@property
def state(self) -> HypothesisState:
"""Get the current environment state."""
if self._env is None:
return HypothesisState()
summary = self._env.get_episode_summary()
return HypothesisState(
step_count=self._step_count,
difficulty=summary.get("difficulty", self.difficulty),
world_name=summary.get("world_name", ""),
world_type=summary.get("world_type", ""),
phase=summary.get("phase", "not_started"),
experiments_used=summary.get("experiments_used", 0),
experiments_remaining=summary.get("experiments_remaining", 0),
hypothesis_count=summary.get("hypotheses_submitted", 0),
best_hypothesis_score=summary.get("best_hypothesis_score", 0.0),
)
def get_metadata(self):
"""Return environment metadata."""
from openenv.core.env_server.types import EnvironmentMetadata
return EnvironmentMetadata(
name="HypothesisEngine",
description=(
"A procedurally-generated RL environment for training LLMs on "
"scientific reasoning through causal discovery, physics simulation, "
"state machine reverse-engineering, and adversarial self-play."
),
version="2.0.0",
author="AbhinavDubey30",
documentation_url="https://github.com/AbhinavDubey30/OpenMax",
)
def close(self) -> None:
"""Clean up resources."""
self._env = None
# ---------------------------------------------------------------------------
# App factory for HuggingFace Spaces / local server
# ---------------------------------------------------------------------------
def create_hypothesis_app(
difficulty: int = 1,
experiment_budget: int = 30,
auto_curriculum: bool = True,
use_self_play: bool = False,
max_concurrent_envs: int = 5,
):
"""
Create a FastAPI app for serving HypothesisEngine on HF Spaces.
Usage:
# In app.py for HF Spaces:
from hypothesis_engine.openenv_wrapper import create_hypothesis_app
app = create_hypothesis_app()
# Or run locally:
# uvicorn hypothesis_engine.openenv_wrapper:app --reload
"""
def env_factory():
return HypothesisEngineOpenEnv(
difficulty=difficulty,
experiment_budget=experiment_budget,
auto_curriculum=auto_curriculum,
use_self_play=use_self_play,
)
return create_app(
env=env_factory,
action_cls=HypothesisAction,
observation_cls=HypothesisObservation,
env_name="HypothesisEngine",
max_concurrent_envs=max_concurrent_envs,
)
# Default app instance for uvicorn / HF Spaces
app = create_hypothesis_app()