MetaDebate / openenv.yaml
vajeeda's picture
final mvp created
98b952a
name: viral-script-debugging-engine
version: "1.0.0"
description: >
A multi-agent RL environment where an LLM Arbitrator learns to improve
short-form video scripts through adversarial debate. Trains with GRPO via
HuggingFace TRL + Unsloth. Hits Theme 1 (Multi-Agent) and Theme 4
(Self-Improvement) simultaneously.
themes:
- multi_agent_interactions
- self_improvement
author: "Team Name"
python_requires: ">=3.10"
entry_point: viral_script_engine.environment.env:ViralScriptEnv
reset_method: reset
step_method: step
state_method: state
reward_method: reward
tools:
- name: env_reset
description: "Start a new script improvement episode. Accepts: session_id (str), difficulty (str: easy|medium|hard), options (dict). Returns: observation dict, info dict."
- name: env_step
description: "Execute one debate round: Critic attacks, Defender responds, Arbitrator acts, Rewriter executes. Accepts: session_id (str), action (dict with action_type, target_section, instruction, critique_claim_id, reasoning). Returns: observation, reward, terminated, truncated, info."
- name: env_state
description: "Get the full current environment state. Accepts: session_id (str). Returns: current_script, original_script, debate_history, reward_components, step_num, difficulty_level, episode_id."
- name: env_health
description: "Health check endpoint. Returns: status, environment name, version."
dependencies:
- anthropic>=0.40.0
- sentence-transformers>=2.7.0
- unsloth
- trl>=0.12.0
- numpy>=1.26.0
- pydantic>=2.0.0
- fastapi>=0.110.0
- uvicorn>=0.29.0