File size: 1,591 Bytes
e6b6793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98b952a
 
 
 
 
 
 
 
e6b6793
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
name: viral-script-debugging-engine
version: "1.0.0"
description: >
  A multi-agent RL environment where an LLM Arbitrator learns to improve
  short-form video scripts through adversarial debate. Trains with GRPO via
  HuggingFace TRL + Unsloth. Hits Theme 1 (Multi-Agent) and Theme 4
  (Self-Improvement) simultaneously.
themes:
  - multi_agent_interactions
  - self_improvement
author: "Team Name"
python_requires: ">=3.10"
entry_point: viral_script_engine.environment.env:ViralScriptEnv
reset_method: reset
step_method: step
state_method: state
reward_method: reward
tools:
  - name: env_reset
    description: "Start a new script improvement episode. Accepts: session_id (str), difficulty (str: easy|medium|hard), options (dict). Returns: observation dict, info dict."
  - name: env_step
    description: "Execute one debate round: Critic attacks, Defender responds, Arbitrator acts, Rewriter executes. Accepts: session_id (str), action (dict with action_type, target_section, instruction, critique_claim_id, reasoning). Returns: observation, reward, terminated, truncated, info."
  - name: env_state
    description: "Get the full current environment state. Accepts: session_id (str). Returns: current_script, original_script, debate_history, reward_components, step_num, difficulty_level, episode_id."
  - name: env_health
    description: "Health check endpoint. Returns: status, environment name, version."
dependencies:
  - anthropic>=0.40.0
  - sentence-transformers>=2.7.0
  - unsloth
  - trl>=0.12.0
  - numpy>=1.26.0
  - pydantic>=2.0.0
  - fastapi>=0.110.0
  - uvicorn>=0.29.0