Spaces:
Sleeping
Sleeping
| """ | |
| OpenEnv Integration for Hypothesis Engine. | |
| Wraps HypothesisEngine as an openenv-core Environment (v0.2.1) | |
| for deployment on HuggingFace Spaces and use with TRL/Unsloth training. | |
| Usage: | |
| from hypothesis_engine.openenv_wrapper import create_hypothesis_app | |
| app = create_hypothesis_app() | |
| """ | |
| import json | |
| from typing import Any, Dict, List, Optional | |
| from openenv.core import ( | |
| Action, | |
| Environment, | |
| Observation, | |
| State, | |
| create_app, | |
| ) | |
| from pydantic import Field | |
| from .env import HypothesisEngine | |
| # --------------------------------------------------------------------------- | |
| # Custom types for OpenEnv | |
| # --------------------------------------------------------------------------- | |
| class HypothesisAction(Action): | |
| """Action for the Hypothesis Engine environment.""" | |
| action: str = Field( | |
| description=( | |
| "Action type: 'experiment', 'hypothesize', 'predict', " | |
| "'get_status', or 'get_hint'" | |
| ) | |
| ) | |
| inputs: Optional[Dict[str, float]] = Field( | |
| default=None, | |
| description="Input values for experiment action, e.g. {'x': 3.0}", | |
| ) | |
| mode: Optional[str] = Field( | |
| default=None, | |
| description="Experiment mode: 'observe' or 'intervene' (causal worlds only)", | |
| ) | |
| expression: Optional[str] = Field( | |
| default=None, | |
| description="Mathematical expression for hypothesize action, e.g. '2*x + 3'", | |
| ) | |
| predictions: Optional[List[float]] = Field( | |
| default=None, | |
| description="List of predicted values for predict action", | |
| ) | |
| class HypothesisObservation(Observation): | |
| """Observation returned by the Hypothesis Engine environment.""" | |
| text: str = Field( | |
| default="", | |
| description="Natural language observation for LLM agents", | |
| ) | |
| phase: str = Field( | |
| default="not_started", | |
| description="Current episode phase: exploration, prediction, or done", | |
| ) | |
| world_name: str = Field(default="", description="Name of the current world") | |
| world_type: str = Field(default="", description="Type of the current world") | |
| experiments_remaining: int = Field( | |
| default=0, description="Experiments remaining in budget" | |
| ) | |
| experiments_used: int = Field( | |
| default=0, description="Experiments used so far" | |
| ) | |
| action_space: str = Field( | |
| default="", description="Description of available actions" | |
| ) | |
| class HypothesisState(State): | |
| """Internal state of the Hypothesis Engine environment.""" | |
| difficulty: int = Field(default=1, description="Current difficulty level") | |
| world_name: str = Field(default="", description="Current world name") | |
| world_type: str = Field(default="", description="Current world type") | |
| phase: str = Field(default="not_started", description="Episode phase") | |
| experiments_used: int = Field(default=0, description="Experiments used") | |
| experiments_remaining: int = Field(default=0, description="Experiments left") | |
| hypothesis_count: int = Field(default=0, description="Hypotheses submitted") | |
| best_hypothesis_score: float = Field( | |
| default=0.0, description="Best hypothesis score so far" | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Observation formatter -- turns raw dict into LLM-friendly text | |
| # --------------------------------------------------------------------------- | |
| def _format_observation_text(raw_obs: Dict[str, Any], action_desc: str) -> str: | |
| """Convert a raw HypothesisEngine observation dict into a natural-language string.""" | |
| parts = [] | |
| # Message | |
| if raw_obs.get("message"): | |
| parts.append(raw_obs["message"]) | |
| # World info | |
| world = raw_obs.get("world", {}) | |
| if world: | |
| parts.append( | |
| f"\n-- World: {world.get('world_name', '?')} " | |
| f"(type: {world.get('world_type', '?')}, " | |
| f"difficulty: {world.get('difficulty', '?')})" | |
| ) | |
| parts.append(f" Description: {world.get('description', '')}") | |
| parts.append(f" Variables: {world.get('variables', [])}") | |
| if world.get('causal_mode'): | |
| parts.append( | |
| f" Causal Mode: This world supports observe AND intervene experiments." | |
| ) | |
| # Budget | |
| if "experiments_remaining" in raw_obs: | |
| parts.append( | |
| f"\n-- Budget: {raw_obs['experiments_remaining']} experiments remaining " | |
| f"(used {raw_obs.get('experiments_used', 0)})" | |
| ) | |
| # Last experiment result | |
| if raw_obs.get("last_experiment_result"): | |
| r = raw_obs["last_experiment_result"] | |
| parts.append(f"\n-- Last Experiment: inputs={r.get('inputs')}, output={r.get('output')}") | |
| if r.get("mode"): | |
| parts.append(f" Mode: {r['mode']}") | |
| # Hypothesis feedback | |
| if raw_obs.get("hypothesis_feedback"): | |
| hf = raw_obs["hypothesis_feedback"] | |
| parts.append(f"\n-- Hypothesis Feedback: {hf.get('quality', '')}") | |
| # Recent experiment history (last 5) | |
| hist = raw_obs.get("experiment_history", []) | |
| if hist: | |
| parts.append(f"\n-- Recent Experiments ({len(hist)} shown):") | |
| for i, exp in enumerate(hist[-5:], 1): | |
| parts.append(f" {i}. inputs={exp.get('inputs')} -> output={exp.get('output')}") | |
| # Test cases | |
| tests = raw_obs.get("test_cases", []) | |
| if tests: | |
| parts.append(f"\n-- Test Cases to Predict ({len(tests)} total):") | |
| for i, tc in enumerate(tests[:5], 1): | |
| parts.append(f" {i}. {tc}") | |
| if len(tests) > 5: | |
| parts.append(f" ... and {len(tests) - 5} more") | |
| # Final results | |
| if raw_obs.get("final_results"): | |
| fr = raw_obs["final_results"] | |
| rb = fr.get("reward_breakdown", {}) | |
| parts.append(f"\n-- FINAL RESULTS --") | |
| parts.append(f" Total Reward: {rb.get('total_reward', 0):.1f}/100") | |
| parts.append(f" Ground Truth: {fr.get('ground_truth', '?')}") | |
| parts.append(f" Passed: {fr.get('passed', False)}") | |
| # Action space | |
| parts.append(f"\n-- Available Actions --\n{action_desc}") | |
| return "\n".join(parts) | |
| # --------------------------------------------------------------------------- | |
| # OpenEnv Environment | |
| # --------------------------------------------------------------------------- | |
| class HypothesisEngineOpenEnv( | |
| Environment[HypothesisAction, HypothesisObservation, HypothesisState] | |
| ): | |
| """ | |
| OpenEnv-compatible wrapper for Hypothesis Engine. | |
| This wraps HypothesisEngine to work with openenv-core 0.2.1, | |
| enabling deployment on HuggingFace Spaces and training with TRL/Unsloth. | |
| """ | |
| SUPPORTS_CONCURRENT_SESSIONS = True | |
| def __init__( | |
| self, | |
| difficulty: int = 1, | |
| experiment_budget: int = 30, | |
| auto_curriculum: bool = True, | |
| use_self_play: bool = False, | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| self.difficulty = difficulty | |
| self.experiment_budget = experiment_budget | |
| self.auto_curriculum = auto_curriculum | |
| self.use_self_play = use_self_play | |
| self._env: Optional[HypothesisEngine] = None | |
| self._last_raw_obs: Dict[str, Any] = {} | |
| self._step_count = 0 | |
| def _ensure_env(self, seed: Optional[int] = None) -> HypothesisEngine: | |
| """Create a new HypothesisEngine instance.""" | |
| return HypothesisEngine( | |
| difficulty=self.difficulty, | |
| experiment_budget=self.experiment_budget, | |
| seed=seed, | |
| auto_curriculum=self.auto_curriculum, | |
| use_self_play=self.use_self_play, | |
| ) | |
| def reset( | |
| self, | |
| seed: Optional[int] = None, | |
| episode_id: Optional[str] = None, | |
| **kwargs, | |
| ) -> HypothesisObservation: | |
| """Reset the environment and return initial observation.""" | |
| self._env = self._ensure_env(seed=seed) | |
| self._step_count = 0 | |
| raw_obs = self._env.reset(seed=seed) | |
| self._last_raw_obs = raw_obs | |
| action_desc = self._env.get_action_space_description() | |
| text = _format_observation_text(raw_obs, action_desc) | |
| world = raw_obs.get("world", {}) | |
| obs = HypothesisObservation( | |
| text=text, | |
| done=False, | |
| reward=None, | |
| phase=raw_obs.get("phase", "exploration"), | |
| world_name=world.get("world_name", ""), | |
| world_type=world.get("world_type", ""), | |
| experiments_remaining=raw_obs.get("experiments_remaining", 0), | |
| experiments_used=raw_obs.get("experiments_used", 0), | |
| action_space=action_desc, | |
| metadata={"episode_id": episode_id or "", "raw": raw_obs}, | |
| ) | |
| return self._apply_transform(obs) | |
| def step( | |
| self, | |
| action: HypothesisAction, | |
| timeout_s: Optional[float] = None, | |
| **kwargs, | |
| ) -> HypothesisObservation: | |
| """Take a step in the environment.""" | |
| if self._env is None: | |
| return HypothesisObservation( | |
| text="Environment not started. Call reset() first.", | |
| done=False, | |
| reward=-1.0, | |
| phase="not_started", | |
| ) | |
| # Convert OpenEnv action to HypothesisEngine action dict | |
| action_dict: Dict[str, Any] = {"action": action.action} | |
| if action.inputs is not None: | |
| action_dict["inputs"] = action.inputs | |
| if action.mode is not None: | |
| action_dict["mode"] = action.mode | |
| if action.expression is not None: | |
| action_dict["expression"] = action.expression | |
| if action.predictions is not None: | |
| action_dict["predictions"] = action.predictions | |
| raw_obs, reward, done, info = self._env.step(action_dict) | |
| self._last_raw_obs = raw_obs | |
| self._step_count += 1 | |
| action_desc = self._env.get_action_space_description() | |
| text = _format_observation_text(raw_obs, action_desc) | |
| world = raw_obs.get("world", {}) | |
| obs = HypothesisObservation( | |
| text=text, | |
| done=done, | |
| reward=reward, | |
| phase=raw_obs.get("phase", ""), | |
| world_name=world.get("world_name", ""), | |
| world_type=world.get("world_type", ""), | |
| experiments_remaining=raw_obs.get("experiments_remaining", 0), | |
| experiments_used=raw_obs.get("experiments_used", 0), | |
| action_space=action_desc if not done else "", | |
| metadata={"info": info, "raw": raw_obs}, | |
| ) | |
| return self._apply_transform(obs) | |
| def state(self) -> HypothesisState: | |
| """Get the current environment state.""" | |
| if self._env is None: | |
| return HypothesisState() | |
| summary = self._env.get_episode_summary() | |
| return HypothesisState( | |
| step_count=self._step_count, | |
| difficulty=summary.get("difficulty", self.difficulty), | |
| world_name=summary.get("world_name", ""), | |
| world_type=summary.get("world_type", ""), | |
| phase=summary.get("phase", "not_started"), | |
| experiments_used=summary.get("experiments_used", 0), | |
| experiments_remaining=summary.get("experiments_remaining", 0), | |
| hypothesis_count=summary.get("hypotheses_submitted", 0), | |
| best_hypothesis_score=summary.get("best_hypothesis_score", 0.0), | |
| ) | |
| def get_metadata(self): | |
| """Return environment metadata.""" | |
| from openenv.core.env_server.types import EnvironmentMetadata | |
| return EnvironmentMetadata( | |
| name="HypothesisEngine", | |
| description=( | |
| "A procedurally-generated RL environment for training LLMs on " | |
| "scientific reasoning through causal discovery, physics simulation, " | |
| "state machine reverse-engineering, and adversarial self-play." | |
| ), | |
| version="2.0.0", | |
| author="AbhinavDubey30", | |
| documentation_url="https://github.com/AbhinavDubey30/OpenMax", | |
| ) | |
| def close(self) -> None: | |
| """Clean up resources.""" | |
| self._env = None | |
| # --------------------------------------------------------------------------- | |
| # App factory for HuggingFace Spaces / local server | |
| # --------------------------------------------------------------------------- | |
| def create_hypothesis_app( | |
| difficulty: int = 1, | |
| experiment_budget: int = 30, | |
| auto_curriculum: bool = True, | |
| use_self_play: bool = False, | |
| max_concurrent_envs: int = 5, | |
| ): | |
| """ | |
| Create a FastAPI app for serving HypothesisEngine on HF Spaces. | |
| Usage: | |
| # In app.py for HF Spaces: | |
| from hypothesis_engine.openenv_wrapper import create_hypothesis_app | |
| app = create_hypothesis_app() | |
| # Or run locally: | |
| # uvicorn hypothesis_engine.openenv_wrapper:app --reload | |
| """ | |
| def env_factory(): | |
| return HypothesisEngineOpenEnv( | |
| difficulty=difficulty, | |
| experiment_budget=experiment_budget, | |
| auto_curriculum=auto_curriculum, | |
| use_self_play=use_self_play, | |
| ) | |
| return create_app( | |
| env=env_factory, | |
| action_cls=HypothesisAction, | |
| observation_cls=HypothesisObservation, | |
| env_name="HypothesisEngine", | |
| max_concurrent_envs=max_concurrent_envs, | |
| ) | |
| # Default app instance for uvicorn / HF Spaces | |
| app = create_hypothesis_app() | |