""" OpenEnv Integration for Hypothesis Engine. Wraps HypothesisEngine as an openenv-core Environment (v0.2.1) for deployment on HuggingFace Spaces and use with TRL/Unsloth training. Usage: from hypothesis_engine.openenv_wrapper import create_hypothesis_app app = create_hypothesis_app() """ import json from typing import Any, Dict, List, Optional from openenv.core import ( Action, Environment, Observation, State, create_app, ) from pydantic import Field from .env import HypothesisEngine # --------------------------------------------------------------------------- # Custom types for OpenEnv # --------------------------------------------------------------------------- class HypothesisAction(Action): """Action for the Hypothesis Engine environment.""" action: str = Field( description=( "Action type: 'experiment', 'hypothesize', 'predict', " "'get_status', or 'get_hint'" ) ) inputs: Optional[Dict[str, float]] = Field( default=None, description="Input values for experiment action, e.g. {'x': 3.0}", ) mode: Optional[str] = Field( default=None, description="Experiment mode: 'observe' or 'intervene' (causal worlds only)", ) expression: Optional[str] = Field( default=None, description="Mathematical expression for hypothesize action, e.g. '2*x + 3'", ) predictions: Optional[List[float]] = Field( default=None, description="List of predicted values for predict action", ) class HypothesisObservation(Observation): """Observation returned by the Hypothesis Engine environment.""" text: str = Field( default="", description="Natural language observation for LLM agents", ) phase: str = Field( default="not_started", description="Current episode phase: exploration, prediction, or done", ) world_name: str = Field(default="", description="Name of the current world") world_type: str = Field(default="", description="Type of the current world") experiments_remaining: int = Field( default=0, description="Experiments remaining in budget" ) experiments_used: int = Field( default=0, description="Experiments used so far" ) action_space: str = Field( default="", description="Description of available actions" ) class HypothesisState(State): """Internal state of the Hypothesis Engine environment.""" difficulty: int = Field(default=1, description="Current difficulty level") world_name: str = Field(default="", description="Current world name") world_type: str = Field(default="", description="Current world type") phase: str = Field(default="not_started", description="Episode phase") experiments_used: int = Field(default=0, description="Experiments used") experiments_remaining: int = Field(default=0, description="Experiments left") hypothesis_count: int = Field(default=0, description="Hypotheses submitted") best_hypothesis_score: float = Field( default=0.0, description="Best hypothesis score so far" ) # --------------------------------------------------------------------------- # Observation formatter -- turns raw dict into LLM-friendly text # --------------------------------------------------------------------------- def _format_observation_text(raw_obs: Dict[str, Any], action_desc: str) -> str: """Convert a raw HypothesisEngine observation dict into a natural-language string.""" parts = [] # Message if raw_obs.get("message"): parts.append(raw_obs["message"]) # World info world = raw_obs.get("world", {}) if world: parts.append( f"\n-- World: {world.get('world_name', '?')} " f"(type: {world.get('world_type', '?')}, " f"difficulty: {world.get('difficulty', '?')})" ) parts.append(f" Description: {world.get('description', '')}") parts.append(f" Variables: {world.get('variables', [])}") if world.get('causal_mode'): parts.append( f" Causal Mode: This world supports observe AND intervene experiments." ) # Budget if "experiments_remaining" in raw_obs: parts.append( f"\n-- Budget: {raw_obs['experiments_remaining']} experiments remaining " f"(used {raw_obs.get('experiments_used', 0)})" ) # Last experiment result if raw_obs.get("last_experiment_result"): r = raw_obs["last_experiment_result"] parts.append(f"\n-- Last Experiment: inputs={r.get('inputs')}, output={r.get('output')}") if r.get("mode"): parts.append(f" Mode: {r['mode']}") # Hypothesis feedback if raw_obs.get("hypothesis_feedback"): hf = raw_obs["hypothesis_feedback"] parts.append(f"\n-- Hypothesis Feedback: {hf.get('quality', '')}") # Recent experiment history (last 5) hist = raw_obs.get("experiment_history", []) if hist: parts.append(f"\n-- Recent Experiments ({len(hist)} shown):") for i, exp in enumerate(hist[-5:], 1): parts.append(f" {i}. inputs={exp.get('inputs')} -> output={exp.get('output')}") # Test cases tests = raw_obs.get("test_cases", []) if tests: parts.append(f"\n-- Test Cases to Predict ({len(tests)} total):") for i, tc in enumerate(tests[:5], 1): parts.append(f" {i}. {tc}") if len(tests) > 5: parts.append(f" ... and {len(tests) - 5} more") # Final results if raw_obs.get("final_results"): fr = raw_obs["final_results"] rb = fr.get("reward_breakdown", {}) parts.append(f"\n-- FINAL RESULTS --") parts.append(f" Total Reward: {rb.get('total_reward', 0):.1f}/100") parts.append(f" Ground Truth: {fr.get('ground_truth', '?')}") parts.append(f" Passed: {fr.get('passed', False)}") # Action space parts.append(f"\n-- Available Actions --\n{action_desc}") return "\n".join(parts) # --------------------------------------------------------------------------- # OpenEnv Environment # --------------------------------------------------------------------------- class HypothesisEngineOpenEnv( Environment[HypothesisAction, HypothesisObservation, HypothesisState] ): """ OpenEnv-compatible wrapper for Hypothesis Engine. This wraps HypothesisEngine to work with openenv-core 0.2.1, enabling deployment on HuggingFace Spaces and training with TRL/Unsloth. """ SUPPORTS_CONCURRENT_SESSIONS = True def __init__( self, difficulty: int = 1, experiment_budget: int = 30, auto_curriculum: bool = True, use_self_play: bool = False, **kwargs, ): super().__init__(**kwargs) self.difficulty = difficulty self.experiment_budget = experiment_budget self.auto_curriculum = auto_curriculum self.use_self_play = use_self_play self._env: Optional[HypothesisEngine] = None self._last_raw_obs: Dict[str, Any] = {} self._step_count = 0 def _ensure_env(self, seed: Optional[int] = None) -> HypothesisEngine: """Create a new HypothesisEngine instance.""" return HypothesisEngine( difficulty=self.difficulty, experiment_budget=self.experiment_budget, seed=seed, auto_curriculum=self.auto_curriculum, use_self_play=self.use_self_play, ) def reset( self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs, ) -> HypothesisObservation: """Reset the environment and return initial observation.""" self._env = self._ensure_env(seed=seed) self._step_count = 0 raw_obs = self._env.reset(seed=seed) self._last_raw_obs = raw_obs action_desc = self._env.get_action_space_description() text = _format_observation_text(raw_obs, action_desc) world = raw_obs.get("world", {}) obs = HypothesisObservation( text=text, done=False, reward=None, phase=raw_obs.get("phase", "exploration"), world_name=world.get("world_name", ""), world_type=world.get("world_type", ""), experiments_remaining=raw_obs.get("experiments_remaining", 0), experiments_used=raw_obs.get("experiments_used", 0), action_space=action_desc, metadata={"episode_id": episode_id or "", "raw": raw_obs}, ) return self._apply_transform(obs) def step( self, action: HypothesisAction, timeout_s: Optional[float] = None, **kwargs, ) -> HypothesisObservation: """Take a step in the environment.""" if self._env is None: return HypothesisObservation( text="Environment not started. Call reset() first.", done=False, reward=-1.0, phase="not_started", ) # Convert OpenEnv action to HypothesisEngine action dict action_dict: Dict[str, Any] = {"action": action.action} if action.inputs is not None: action_dict["inputs"] = action.inputs if action.mode is not None: action_dict["mode"] = action.mode if action.expression is not None: action_dict["expression"] = action.expression if action.predictions is not None: action_dict["predictions"] = action.predictions raw_obs, reward, done, info = self._env.step(action_dict) self._last_raw_obs = raw_obs self._step_count += 1 action_desc = self._env.get_action_space_description() text = _format_observation_text(raw_obs, action_desc) world = raw_obs.get("world", {}) obs = HypothesisObservation( text=text, done=done, reward=reward, phase=raw_obs.get("phase", ""), world_name=world.get("world_name", ""), world_type=world.get("world_type", ""), experiments_remaining=raw_obs.get("experiments_remaining", 0), experiments_used=raw_obs.get("experiments_used", 0), action_space=action_desc if not done else "", metadata={"info": info, "raw": raw_obs}, ) return self._apply_transform(obs) @property def state(self) -> HypothesisState: """Get the current environment state.""" if self._env is None: return HypothesisState() summary = self._env.get_episode_summary() return HypothesisState( step_count=self._step_count, difficulty=summary.get("difficulty", self.difficulty), world_name=summary.get("world_name", ""), world_type=summary.get("world_type", ""), phase=summary.get("phase", "not_started"), experiments_used=summary.get("experiments_used", 0), experiments_remaining=summary.get("experiments_remaining", 0), hypothesis_count=summary.get("hypotheses_submitted", 0), best_hypothesis_score=summary.get("best_hypothesis_score", 0.0), ) def get_metadata(self): """Return environment metadata.""" from openenv.core.env_server.types import EnvironmentMetadata return EnvironmentMetadata( name="HypothesisEngine", description=( "A procedurally-generated RL environment for training LLMs on " "scientific reasoning through causal discovery, physics simulation, " "state machine reverse-engineering, and adversarial self-play." ), version="2.0.0", author="AbhinavDubey30", documentation_url="https://github.com/AbhinavDubey30/OpenMax", ) def close(self) -> None: """Clean up resources.""" self._env = None # --------------------------------------------------------------------------- # App factory for HuggingFace Spaces / local server # --------------------------------------------------------------------------- def create_hypothesis_app( difficulty: int = 1, experiment_budget: int = 30, auto_curriculum: bool = True, use_self_play: bool = False, max_concurrent_envs: int = 5, ): """ Create a FastAPI app for serving HypothesisEngine on HF Spaces. Usage: # In app.py for HF Spaces: from hypothesis_engine.openenv_wrapper import create_hypothesis_app app = create_hypothesis_app() # Or run locally: # uvicorn hypothesis_engine.openenv_wrapper:app --reload """ def env_factory(): return HypothesisEngineOpenEnv( difficulty=difficulty, experiment_budget=experiment_budget, auto_curriculum=auto_curriculum, use_self_play=use_self_play, ) return create_app( env=env_factory, action_cls=HypothesisAction, observation_cls=HypothesisObservation, env_name="HypothesisEngine", max_concurrent_envs=max_concurrent_envs, ) # Default app instance for uvicorn / HF Spaces app = create_hypothesis_app()