Spaces:

vajeeda
/

MetaDebate

Sleeping

vajeeda Claude Sonnet 4.6 commited on Apr 25

Commit

41ea373

1 Parent(s): 5fcd0ee

feat(phase1): OpenEnv scaffold + R1/R2 rewards — PHASE 1 GATE PASS

- environment/actions.py: ActionType enum + ArbitratorAction model
- environment/observations.py: RewardComponents (weighted, normalised), DebateRound, Observation
- environment/episode_state.py: mutable episode state dataclass
- environment/env.py: ViralScriptEnv — Gymnasium-compatible reset/step/state, difficulty tiers, anti-gaming wired
- rewards/r1_hook_strength.py: 5-check rule-based hook scorer (promise, curiosity, specificity, front-load, anti-filler)
- rewards/r2_coherence.py: sentence-transformers cosine similarity with 4-range score mapping + embedding cache
- rewards/reward_aggregator.py: catastrophic-drop (>0.2) + action-diversity anti-gaming rules
- agents/rewriter.py: RewriterAgent wrapping LLMBackend with unified diff output
- scripts/run_dummy_episode.py: demo runner with rich output and gate check
- tests/test_rewards.py: 10 tests — R1 (5), R2 (2), aggregator (3)
- tests/test_environment.py: 6 tests — all LLM calls mocked via golden fixtures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (14) hide show

viral_script_engine/agents/rewriter.py +44 -0
viral_script_engine/environment/__init__.py +0 -0
viral_script_engine/environment/actions.py +17 -0
viral_script_engine/environment/env.py +151 -0
viral_script_engine/environment/episode_state.py +48 -0
viral_script_engine/environment/observations.py +60 -0
viral_script_engine/rewards/__init__.py +0 -0
viral_script_engine/rewards/base.py +7 -0
viral_script_engine/rewards/r1_hook_strength.py +107 -0
viral_script_engine/rewards/r2_coherence.py +48 -0
viral_script_engine/rewards/reward_aggregator.py +42 -0
viral_script_engine/scripts/run_dummy_episode.py +149 -0
viral_script_engine/tests/test_environment.py +107 -0
viral_script_engine/tests/test_rewards.py +117 -0

viral_script_engine/agents/rewriter.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import difflib
+from pydantic import BaseModel
+from viral_script_engine.agents.llm_backend import LLMBackend
+from viral_script_engine.environment.actions import ArbitratorAction
+_SYSTEM_PROMPT = (
+    "You are a professional script editor for short-form social media video. "
+    "Apply ONLY the instruction given. Do not make any other changes. "
+    "Do not add new ideas. Do not change the creator's voice or regional language patterns. "
+    "Return ONLY the rewritten script text, no commentary."
+)
+class RewriteResult(BaseModel):
+    rewritten_script: str
+    diff: str
+    word_count_delta: int
+class RewriterAgent:
+    def __init__(self, backend: str = "groq", model_name: str = "llama-3.3-70b-versatile"):
+        self.llm = LLMBackend(backend=backend, model_name=model_name)
+    def rewrite(self, current_script: str, action: ArbitratorAction) -> RewriteResult:
+        user_prompt = (
+            f"CURRENT SCRIPT:\n{current_script}\n\n"
+            f"ACTION TYPE: {action.action_type.value}\n"
+            f"TARGET SECTION: {action.target_section}\n"
+            f"INSTRUCTION: {action.instruction}\n\n"
+            "Apply the instruction and return ONLY the rewritten script."
+        )
+        rewritten = self.llm.generate(_SYSTEM_PROMPT, user_prompt, max_tokens=2048)
+        diff_lines = list(difflib.unified_diff(
+            current_script.splitlines(keepends=True),
+            rewritten.splitlines(keepends=True),
+            fromfile="original",
+            tofile="rewritten",
+        ))
+        return RewriteResult(
+            rewritten_script=rewritten,
+            diff="".join(diff_lines),
+            word_count_delta=len(rewritten.split()) - len(current_script.split()),
+        )

viral_script_engine/environment/__init__.py ADDED Viewed

File without changes

viral_script_engine/environment/actions.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from enum import Enum
+from pydantic import BaseModel
+class ActionType(str, Enum):
+    HOOK_REWRITE = "hook_rewrite"
+    SECTION_REORDER = "section_reorder"
+    CULTURAL_REF_SUB = "cultural_ref_sub"
+    CTA_PLACEMENT = "cta_placement"
+class ArbitratorAction(BaseModel):
+    action_type: ActionType
+    target_section: str       # "hook" | "body" | "cta" | "full"
+    instruction: str          # natural language instruction to the Rewriter
+    critique_claim_id: str    # which CritiqueClaim this responds to, e.g. "C2"
+    reasoning: str            # why this action was chosen

viral_script_engine/environment/env.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import json
+import random
+from typing import Optional, Tuple
+from viral_script_engine.agents.critic import CriticAgent
+from viral_script_engine.agents.rewriter import RewriterAgent
+from viral_script_engine.environment.actions import ArbitratorAction
+from viral_script_engine.environment.episode_state import EpisodeState
+from viral_script_engine.environment.observations import (
+    DebateRound, Observation, RewardComponents,
+)
+from viral_script_engine.rewards.r1_hook_strength import HookStrengthReward
+from viral_script_engine.rewards.r2_coherence import CoherenceReward
+from viral_script_engine.rewards.reward_aggregator import RewardAggregator
+_TIERS = {
+    "easy": ["S01", "S02", "S03", "S04"],
+    "medium": ["S05", "S06", "S07"],
+    "hard": ["S08", "S09", "S10"],
+    "self_generated": [],
+}
+class ViralScriptEnv:
+    def __init__(
+        self,
+        scripts_path: str = "data/test_scripts/scripts.json",
+        max_steps: int = 5,
+        difficulty: str = "easy",
+        use_anti_gaming: bool = True,
+    ):
+        self.max_steps = max_steps
+        self.difficulty = difficulty
+        self.use_anti_gaming = use_anti_gaming
+        with open(scripts_path) as f:
+            all_scripts = json.load(f)
+        tier_ids = _TIERS[difficulty]
+        self._scripts = [s for s in all_scripts if s["script_id"] in tier_ids]
+        self.critic = CriticAgent()
+        self.rewriter = RewriterAgent()
+        self.r1 = HookStrengthReward()
+        self.r2 = CoherenceReward()
+        self.aggregator = RewardAggregator()
+        self._state: Optional[EpisodeState] = None
+    def reset(self, seed=None, options=None) -> Tuple[dict, dict]:
+        if seed is not None:
+            random.seed(seed)
+        script = random.choice(self._scripts)
+        r1_result = self.r1.score(script["script_text"])
+        r2_result = self.r2.score(script["script_text"], script["script_text"])
+        initial_rewards = RewardComponents(
+            r1_hook_strength=r1_result.score,
+            r2_coherence=r2_result.score,
+        )
+        initial_rewards.compute_total()
+        self._state = EpisodeState.new(
+            script=script,
+            max_steps=self.max_steps,
+            difficulty_level=self.difficulty,
+            initial_rewards=initial_rewards,
+        )
+        return self._build_observation().model_dump(), {}
+    def step(self, action: dict) -> Tuple[dict, float, bool, bool, dict]:
+        if self._state is None:
+            raise RuntimeError("Call reset() before step()")
+        arb_action = ArbitratorAction(**action)
+        critique = self.critic.critique(
+            script=self._state.current_script,
+            region=self._state.region,
+            platform=self._state.platform,
+            niche=self._state.niche,
+        )
+        rewrite_result = self.rewriter.rewrite(self._state.current_script, arb_action)
+        new_script = rewrite_result.rewritten_script
+        r1_result = self.r1.score(new_script)
+        r2_result = self.r2.score(self._state.original_script, new_script)
+        components = RewardComponents(
+            r1_hook_strength=r1_result.score,
+            r2_coherence=r2_result.score,
+        )
+        self._state.action_history.append(arb_action.action_type)
+        if self.use_anti_gaming:
+            components = self.aggregator.compute(
+                components, self._state.episode_start_rewards, self._state.action_history
+            )
+        else:
+            components.compute_total()
+        round_ = DebateRound(
+            step_num=self._state.step_num,
+            critic_claims=critique.claims,
+            arbitrator_action=arb_action,
+            rewrite_diff=rewrite_result.diff,
+            reward_components=components,
+        )
+        self._state.debate_history.append(round_)
+        self._state.current_script = new_script
+        self._state.last_reward_components = components
+        self._state.step_num += 1
+        terminated = (
+            self._state.step_num >= self._state.max_steps
+            or components.total >= 0.9
+        )
+        info = {
+            "reward_components": components.model_dump(),
+            "anti_gaming_triggered": components.anti_gaming_penalty > 0,
+            "penalty_reason": "anti_gaming" if components.anti_gaming_penalty > 0 else None,
+        }
+        return self._build_observation().model_dump(), components.total, terminated, False, info
+    def state(self) -> dict:
+        if self._state is None:
+            return {}
+        s = self._state
+        return {
+            "current_script": s.current_script,
+            "original_script": s.original_script,
+            "debate_history": [r.model_dump() for r in s.debate_history],
+            "reward_components": s.last_reward_components.model_dump(),
+            "step_num": s.step_num,
+            "difficulty_level": s.difficulty_level,
+            "episode_id": s.episode_id,
+        }
+    def _build_observation(self) -> Observation:
+        s = self._state
+        return Observation(
+            current_script=s.current_script,
+            original_script=s.original_script,
+            region=s.region,
+            platform=s.platform,
+            niche=s.niche,
+            step_num=s.step_num,
+            max_steps=s.max_steps,
+            debate_history=s.debate_history,
+            reward_components=s.last_reward_components,
+            difficulty_level=s.difficulty_level,
+            episode_id=s.episode_id,
+        )

viral_script_engine/environment/episode_state.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from __future__ import annotations
+import uuid
+from dataclasses import dataclass, field
+from typing import List
+from viral_script_engine.environment.actions import ActionType
+from viral_script_engine.environment.observations import DebateRound, RewardComponents
+@dataclass
+class EpisodeState:
+    episode_id: str
+    original_script: str
+    current_script: str
+    region: str
+    platform: str
+    niche: str
+    step_num: int
+    max_steps: int
+    debate_history: List[DebateRound]
+    episode_start_rewards: RewardComponents
+    last_reward_components: RewardComponents
+    difficulty_level: str
+    action_history: List[ActionType]
+    @classmethod
+    def new(
+        cls,
+        script: dict,
+        max_steps: int,
+        difficulty_level: str,
+        initial_rewards: RewardComponents,
+    ) -> EpisodeState:
+        return cls(
+            episode_id=str(uuid.uuid4()),
+            original_script=script["script_text"],
+            current_script=script["script_text"],
+            region=script["region"],
+            platform=script["platform"],
+            niche=script["niche"],
+            step_num=0,
+            max_steps=max_steps,
+            debate_history=[],
+            episode_start_rewards=initial_rewards,
+            last_reward_components=initial_rewards,
+            difficulty_level=difficulty_level,
+            action_history=[],
+        )

viral_script_engine/environment/observations.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel
+from viral_script_engine.agents.critic import CritiqueClaim
+from viral_script_engine.environment.actions import ArbitratorAction
+_WEIGHTS: Dict[str, float] = {
+    "r1": 0.25, "r2": 0.20, "r3": 0.20, "r4": 0.20, "r5": 0.15
+}
+class RewardComponents(BaseModel):
+    r1_hook_strength: Optional[float] = None
+    r2_coherence: Optional[float] = None
+    r3_cultural_alignment: Optional[float] = None
+    r4_debate_resolution: Optional[float] = None
+    r5_defender_preservation: Optional[float] = None
+    anti_gaming_penalty: float = 0.0
+    total: float = 0.0
+    def compute_total(self) -> float:
+        vals = {
+            "r1": self.r1_hook_strength,
+            "r2": self.r2_coherence,
+            "r3": self.r3_cultural_alignment,
+            "r4": self.r4_debate_resolution,
+            "r5": self.r5_defender_preservation,
+        }
+        active = {k: v for k, v in vals.items() if v is not None}
+        if not active:
+            self.total = 0.0
+            return 0.0
+        norm = sum(_WEIGHTS[k] for k in active)
+        weighted = sum(_WEIGHTS[k] * v for k, v in active.items()) / norm
+        self.total = max(0.0, min(1.0, weighted - self.anti_gaming_penalty))
+        return self.total
+class DebateRound(BaseModel):
+    step_num: int
+    critic_claims: List[CritiqueClaim]
+    defender_response: Optional[Any] = None
+    arbitrator_action: Optional[ArbitratorAction] = None
+    rewrite_diff: Optional[str] = None
+    reward_components: Optional[RewardComponents] = None
+class Observation(BaseModel):
+    current_script: str
+    original_script: str
+    region: str
+    platform: str
+    niche: str
+    step_num: int
+    max_steps: int
+    debate_history: List[DebateRound]
+    reward_components: RewardComponents
+    difficulty_level: str
+    episode_id: str

viral_script_engine/rewards/__init__.py ADDED Viewed

File without changes

viral_script_engine/rewards/base.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from abc import ABC, abstractmethod
+class BaseReward(ABC):
+    @abstractmethod
+    def score(self, *args, **kwargs):
+        pass

viral_script_engine/rewards/r1_hook_strength.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import re
+from dataclasses import dataclass, field
+from typing import Dict
+from viral_script_engine.rewards.base import BaseReward
+_DEAD_OPENERS = [
+    "hey guys", "welcome back", "today i want to", "so today",
+    "in this video", "what's up everyone", "hey everyone",
+    "guys today", "hello everyone", "so basically",
+]
+_COMMON_WORDS = {
+    'i', 'the', 'a', 'an', 'my', 'your', 'its', 'it', 'is', 'are',
+    'was', 'were', 'be', 'been', "i've", "i'm", "it's", "here's",
+    'today', 'and', 'but', 'so', 'that', 'this', 'these', 'those',
+}
+@dataclass
+class HookRewardResult:
+    score: float
+    checks_passed: int
+    check_details: Dict[str, bool] = field(default_factory=dict)
+def _extract_hook(text: str) -> str:
+    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+    hook = " ".join(sentences[:3]) if len(sentences) >= 3 else text
+    words = hook.split()
+    return " ".join(words[:50]) if len(words) > 50 else hook
+class HookStrengthReward(BaseReward):
+    def score(self, script: str) -> HookRewardResult:
+        hook = _extract_hook(script)
+        hook_lower = hook.lower()
+        first_sentence = re.split(r'(?<=[.!?])\s+', hook.strip())[0].lower()
+        checks = {
+            "promise": self._check_promise(hook_lower),
+            "curiosity": self._check_curiosity(hook_lower),
+            "specificity": self._check_specificity(hook),
+            "front_load": self._check_front_load(first_sentence),
+            "anti_filler": self._check_anti_filler(hook_lower),
+        }
+        passed = sum(checks.values())
+        return HookRewardResult(
+            score=min(1.0, max(0.0, passed / 5)),
+            checks_passed=passed,
+            check_details=checks,
+        )
+    def _check_promise(self, hook: str) -> bool:
+        bad = ["hey guys", "welcome back", "today we're talking about"]
+        if any(b in hook for b in bad):
+            return False
+        patterns = [
+            r'\d',
+            r'\bhow to\b',
+            r'\bwhy\b',
+            r'\bwhat happens when\b',
+            r'\bi made\b',
+        ]
+        return any(re.search(p, hook) for p in patterns)
+    def _check_curiosity(self, hook: str) -> bool:
+        patterns = [
+            r'\?',
+            r"but here'?s the thing",
+            r"most \w+ don'?t know",
+            r"the secret is",
+            r"nobody tells you",
+            r"most people don'?t",
+        ]
+        if not any(re.search(p, hook) for p in patterns):
+            return False
+        first = re.split(r'(?<=[.!?])\s+', hook)[0]
+        if re.search(r'\?', first) and re.search(r'\b(is|are|was|were|means|equals)\b', first):
+            return False
+        return True
+    def _check_specificity(self, hook: str) -> bool:
+        if re.search(r'\d', hook):
+            return True
+        sentences = re.split(r'(?<=[.!?])\s+', hook)
+        for sentence in sentences:
+            words = sentence.split()[1:]
+            for w in words:
+                clean = w.strip('.,!?;:\'"')
+                if clean and clean[0].isupper() and clean.lower() not in _COMMON_WORDS:
+                    return True
+        return False
+    def _check_front_load(self, first_sentence: str) -> bool:
+        signals = 0
+        if re.search(r'\d', first_sentence):
+            signals += 1
+        promise_patterns = [r'\bhow to\b', r'\bwhy\b', r'\bwhat happens when\b', r'\bi made\b']
+        if any(re.search(p, first_sentence) for p in promise_patterns):
+            signals += 1
+        if re.search(r'\?', first_sentence):
+            signals += 1
+        return signals >= 2
+    def _check_anti_filler(self, hook: str) -> bool:
+        return not any(hook.startswith(opener) for opener in _DEAD_OPENERS)

viral_script_engine/rewards/r2_coherence.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import hashlib
+from dataclasses import dataclass
+from viral_script_engine.rewards.base import BaseReward
+@dataclass
+class CoherenceRewardResult:
+    score: float
+    raw_similarity: float
+    interpretation: str
+class CoherenceReward(BaseReward):
+    _cache: dict = {}
+    def __init__(self):
+        self._model = None
+    def _get_model(self):
+        if self._model is None:
+            from sentence_transformers import SentenceTransformer
+            self._model = SentenceTransformer("all-MiniLM-L6-v2")
+        return self._model
+    def _embed(self, text: str):
+        key = hashlib.sha256(text.encode()).hexdigest()
+        if key not in self._cache:
+            self._cache[key] = self._get_model().encode(text, convert_to_tensor=True)
+        return self._cache[key]
+    def _cosine_sim(self, a, b) -> float:
+        from sentence_transformers.util import cos_sim
+        return float(cos_sim(a, b)[0][0])
+    def score(self, original: str, rewritten: str) -> CoherenceRewardResult:
+        sim = self._cosine_sim(self._embed(original), self._embed(rewritten))
+        if sim > 0.95:
+            score, interpretation = 0.8, "barely_changed"
+        elif sim >= 0.80:
+            score = 0.5 + (sim - 0.80) / 0.15 * 0.5
+            interpretation = "good_coherence"
+        elif sim >= 0.65:
+            score = (sim - 0.65) / 0.15 * 0.5
+            interpretation = "moderate_drift"
+        else:
+            score, interpretation = 0.0, "drifted_too_far"
+        return CoherenceRewardResult(score=score, raw_similarity=sim, interpretation=interpretation)

viral_script_engine/rewards/reward_aggregator.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import logging
+from typing import List
+from viral_script_engine.environment.actions import ActionType
+from viral_script_engine.environment.observations import RewardComponents
+logger = logging.getLogger(__name__)
+_COMPONENT_FIELDS = [
+    "r1_hook_strength", "r2_coherence", "r3_cultural_alignment",
+    "r4_debate_resolution", "r5_defender_preservation",
+]
+class RewardAggregator:
+    def compute(
+        self,
+        components: RewardComponents,
+        episode_start_components: RewardComponents,
+        action_history: List[ActionType],
+    ) -> RewardComponents:
+        components.compute_total()
+        # Anti-gaming rule 1: catastrophic drop (>0.2 drop in any component)
+        for field in _COMPONENT_FIELDS:
+            curr = getattr(components, field)
+            start = getattr(episode_start_components, field)
+            if curr is not None and start is not None and curr < start - 0.2:
+                logger.warning("Catastrophic drop in %s: %.3f -> %.3f", field, start, curr)
+                components.total = 0.0
+                components.anti_gaming_penalty = start - curr
+                return components
+        # Anti-gaming rule 2: action diversity (last 3 same ActionType)
+        penalty = 0.0
+        if len(action_history) >= 3 and len(set(action_history[-3:])) == 1:
+            penalty = 0.15
+            logger.warning("Action diversity penalty: last 3 actions all %s", action_history[-1])
+        components.anti_gaming_penalty = penalty
+        components.total = max(0.0, min(1.0, components.total - penalty))
+        return components

viral_script_engine/scripts/run_dummy_episode.py ADDED Viewed

	@@ -0,0 +1,149 @@

+#!/usr/bin/env python3
+import argparse
+import json
+import random
+import sys
+from pathlib import Path
+from dotenv import load_dotenv
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from rich import box
+load_dotenv()
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from viral_script_engine.environment.actions import ActionType
+from viral_script_engine.environment.env import ViralScriptEnv
+console = Console()
+BASE_DIR = Path(__file__).parent.parent
+def build_random_action(action_type: ActionType) -> dict:
+    labels = {
+        ActionType.HOOK_REWRITE: ("hook", "Rewrite the hook to open with a specific number or bold claim."),
+        ActionType.SECTION_REORDER: ("body", "Move the strongest point to immediately follow the hook."),
+        ActionType.CULTURAL_REF_SUB: ("full", "Replace any generic references with locally relevant ones."),
+        ActionType.CTA_PLACEMENT: ("cta", "Move the call-to-action earlier, before the 80% mark."),
+    }
+    section, instruction = labels[action_type]
+    return {
+        "action_type": action_type.value,
+        "target_section": section,
+        "instruction": instruction,
+        "critique_claim_id": "C1",
+        "reasoning": f"Demo run: applying {action_type.value}",
+    }
+def run_episode(difficulty: str, steps: int, verbose: bool) -> dict:
+    scripts_path = str(BASE_DIR / "data" / "test_scripts" / "scripts.json")
+    env = ViralScriptEnv(scripts_path=scripts_path, max_steps=steps, difficulty=difficulty)
+    obs, _ = env.reset()
+    console.print(Panel(
+        f"[bold]Episode started[/bold]\n"
+        f"Difficulty: {difficulty}  |  Max steps: {steps}\n"
+        f"Region: {obs['region']}  |  Platform: {obs['platform']}  |  Niche: {obs['niche']}\n"
+        f"Episode ID: {obs['episode_id']}",
+        title="[bold blue]Phase 1 Demo Episode[/bold blue]",
+        border_style="blue",
+    ))
+    episode_log = {
+        "episode_id": obs["episode_id"],
+        "difficulty": difficulty,
+        "steps": [],
+        "final_state": None,
+    }
+    for step_num in range(steps):
+        action_type = random.choice(list(ActionType))
+        action = build_random_action(action_type)
+        obs, reward, terminated, truncated, info = env.step(action)
+        rc = info["reward_components"]
+        if verbose:
+            t = Table(title=f"Step {step_num + 1} — {action_type.value}", box=box.SIMPLE_HEAD)
+            t.add_column("Metric", style="cyan", min_width=22)
+            t.add_column("Value", min_width=12)
+            r1_val = rc.get("r1_hook_strength")
+            r2_val = rc.get("r2_coherence")
+            t.add_row("R1 Hook Strength", f"{r1_val:.3f}" if r1_val is not None else "N/A")
+            t.add_row("R2 Coherence", f"{r2_val:.3f}" if r2_val is not None else "N/A")
+            t.add_row("Total Reward", f"[bold]{reward:.3f}[/bold]")
+            if info.get("anti_gaming_triggered"):
+                t.add_row("Anti-Gaming Penalty", f"[red]{rc.get('anti_gaming_penalty', 0):.3f}[/red]")
+                t.add_row("Penalty Reason", f"[red]{info.get('penalty_reason', '')}[/red]")
+            t.add_row("Terminated", str(terminated))
+            console.print(t)
+            if obs.get("debate_history"):
+                latest = obs["debate_history"][-1]
+                if latest.get("rewrite_diff"):
+                    console.print(Panel(
+                        latest["rewrite_diff"][:600] or "(no diff)",
+                        title="Script Diff",
+                        border_style="yellow",
+                    ))
+        episode_log["steps"].append({
+            "step": step_num + 1,
+            "action": action,
+            "reward": reward,
+            "reward_components": rc,
+            "anti_gaming": info.get("anti_gaming_triggered", False),
+            "terminated": terminated,
+        })
+        if terminated:
+            break
+    final_state = env.state()
+    episode_log["final_state"] = final_state
+    final_rc = final_state["reward_components"]
+    console.print(Panel(
+        f"[bold green]Final Reward:[/bold green] {final_rc.get('total', 0):.3f}\n"
+        f"R1 Hook Strength: {final_rc.get('r1_hook_strength', 'N/A')}\n"
+        f"R2 Coherence: {final_rc.get('r2_coherence', 'N/A')}\n"
+        f"Steps completed: {final_state['step_num']}",
+        title="Episode Summary",
+        border_style="green",
+    ))
+    return episode_log
+def main():
+    parser = argparse.ArgumentParser(description="Run Phase 1 dummy episode")
+    parser.add_argument("--difficulty", default="easy", choices=["easy", "medium", "hard"])
+    parser.add_argument("--steps", type=int, default=3)
+    parser.add_argument("--verbose", action="store_true")
+    args = parser.parse_args()
+    episode_log = run_episode(args.difficulty, args.steps, args.verbose)
+    logs_dir = BASE_DIR / "logs"
+    logs_dir.mkdir(exist_ok=True)
+    log_path = logs_dir / f"episode_{episode_log['episode_id']}.json"
+    with open(log_path, "w") as f:
+        json.dump(episode_log, f, indent=2, default=str)
+    console.print(f"[dim]Episode log saved -> {log_path}[/dim]")
+    final_rc = episode_log["final_state"]["reward_components"]
+    gate_pass = (
+        final_rc.get("r1_hook_strength") is not None
+        and final_rc.get("r2_coherence") is not None
+        and log_path.exists()
+    )
+    style = "bold green" if gate_pass else "bold red"
+    label = f"PHASE 1 GATE: {'PASS' if gate_pass else 'FAIL'}"
+    console.print(Panel(f"[{style}]{label}[/{style}]", border_style="green" if gate_pass else "red"))
+if __name__ == "__main__":
+    main()

viral_script_engine/tests/test_environment.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import json
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import pytest
+from viral_script_engine.agents.critic import CritiqueOutput, CritiqueClaim
+from viral_script_engine.agents.rewriter import RewriteResult
+from viral_script_engine.environment.actions import ActionType, ArbitratorAction
+FIXTURE_DIR = Path(__file__).parent.parent / "data" / "golden_fixtures"
+SCRIPTS_PATH = str(Path(__file__).parent.parent / "data" / "test_scripts" / "scripts.json")
+def load_fixture(script_id: str) -> dict:
+    with open(FIXTURE_DIR / f"fixture_{script_id}.json") as f:
+        return json.load(f)
+def make_mock_critique() -> CritiqueOutput:
+    fixture = load_fixture("S01")
+    claims = [CritiqueClaim(**c) for c in fixture["critique"]["claims"]]
+    return CritiqueOutput(
+        claims=claims,
+        overall_severity=fixture["critique"]["overall_severity"],
+        raw_response=fixture["critique"]["raw_response"],
+    )
+def make_mock_rewrite(current_script: str, action: ArbitratorAction) -> RewriteResult:
+    return RewriteResult(
+        rewritten_script=current_script + " [REWRITTEN]",
+        diff="@@ diff @@",
+        word_count_delta=1,
+    )
+SAMPLE_ACTION = {
+    "action_type": ActionType.HOOK_REWRITE.value,
+    "target_section": "hook",
+    "instruction": "Make the hook more attention-grabbing with a specific number.",
+    "critique_claim_id": "C1",
+    "reasoning": "Hook is weak per C1",
+}
+@pytest.fixture
+def env():
+    with (
+        patch("viral_script_engine.environment.env.CriticAgent") as mock_critic_cls,
+        patch("viral_script_engine.environment.env.RewriterAgent") as mock_rewriter_cls,
+    ):
+        mock_critic = MagicMock()
+        mock_critic.critique.return_value = make_mock_critique()
+        mock_critic_cls.return_value = mock_critic
+        mock_rewriter = MagicMock()
+        mock_rewriter.rewrite.side_effect = make_mock_rewrite
+        mock_rewriter_cls.return_value = mock_rewriter
+        from viral_script_engine.environment.env import ViralScriptEnv
+        yield ViralScriptEnv(scripts_path=SCRIPTS_PATH, max_steps=5, difficulty="easy")
+def test_reset_returns_valid_observation(env):
+    obs, info = env.reset(seed=42)
+    assert "current_script" in obs
+    assert obs["step_num"] == 0
+    assert obs["max_steps"] == 5
+    assert obs["reward_components"]["r1_hook_strength"] is not None
+    assert obs["reward_components"]["r2_coherence"] is not None
+def test_step_completes_without_error(env):
+    env.reset(seed=42)
+    obs, reward, terminated, truncated, info = env.step(SAMPLE_ACTION)
+    assert isinstance(reward, float)
+    assert "reward_components" in info
+def test_step_increments_step_num(env):
+    env.reset(seed=42)
+    obs, *_ = env.step(SAMPLE_ACTION)
+    assert obs["step_num"] == 1
+    obs, *_ = env.step(SAMPLE_ACTION)
+    assert obs["step_num"] == 2
+def test_anti_gaming_penalty_fires_on_repeated_action(env):
+    env.reset(seed=42)
+    for _ in range(3):
+        obs, reward, _, _, info = env.step(SAMPLE_ACTION)
+    assert info["anti_gaming_triggered"]
+def test_episode_terminates_at_max_steps(env):
+    env.reset(seed=42)
+    terminated = False
+    for _ in range(5):
+        obs, reward, terminated, truncated, info = env.step(SAMPLE_ACTION)
+    assert terminated
+def test_reward_clipped_to_0_1(env):
+    env.reset(seed=42)
+    _, reward, _, _, _ = env.step(SAMPLE_ACTION)
+    assert 0.0 <= reward <= 1.0

viral_script_engine/tests/test_rewards.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import pytest
+from viral_script_engine.rewards.r1_hook_strength import HookStrengthReward
+from viral_script_engine.rewards.r2_coherence import CoherenceReward
+from viral_script_engine.rewards.reward_aggregator import RewardAggregator
+from viral_script_engine.environment.observations import RewardComponents
+from viral_script_engine.environment.actions import ActionType
+# ── R1 test hooks ─────────────────────────────────────────────────────────────
+HOOK_HIGH_1 = (
+    "I made $10,000 in 30 days with 3 crypto strategies. "
+    "Here's the secret most people don't know. "
+    "This completely changed how I invest."
+)
+HOOK_HIGH_2 = (
+    "Why 95% of people fail at losing weight in 2024. "
+    "Most people don't know this simple truth. "
+    "It's not about calories at all."
+)
+HOOK_LOW_1 = (
+    "Hey guys, welcome back to my channel! "
+    "Today I want to talk about some stuff. "
+    "It's going to be super interesting!"
+)
+HOOK_LOW_2 = (
+    "So basically today I'm going to talk about fitness. "
+    "It's really important for everyone. "
+    "Let's get started with some tips."
+)
+HOOK_EDGE = (
+    "What nobody tells you about starting a business in India. "
+    "I found out the hard way. "
+    "Here's my experience."
+)
+@pytest.fixture
+def r1():
+    return HookStrengthReward()
+@pytest.fixture
+def r2():
+    return CoherenceReward()
+@pytest.fixture
+def aggregator():
+    return RewardAggregator()
+# ── R1 tests ──────────────────────────────────────────────────────────────────
+def test_r1_high_score_1(r1):
+    result = r1.score(HOOK_HIGH_1)
+    assert result.score > 0.8
+def test_r1_high_score_2(r1):
+    result = r1.score(HOOK_HIGH_2)
+    assert result.score > 0.8
+def test_r1_low_score_1(r1):
+    result = r1.score(HOOK_LOW_1)
+    assert result.score < 0.3
+def test_r1_low_score_2(r1):
+    result = r1.score(HOOK_LOW_2)
+    assert result.score < 0.3
+def test_r1_edge_case(r1):
+    result = r1.score(HOOK_EDGE)
+    assert 0.3 <= result.score <= 0.7
+# ── R2 tests ──────────────────────────────────────────────────────────────────
+def test_r2_identical_strings(r2):
+    text = "This is a test script for the viral script engine."
+    result = r2.score(text, text)
+    assert result.score == 0.8
+def test_r2_different_strings(r2):
+    orig = "I made $10,000 with crypto in 30 days using these 3 strategies."
+    diff = "The history of ancient Rome spans over a thousand years of conquest."
+    result = r2.score(orig, diff)
+    assert result.score == 0.0
+# ── Aggregator tests ──────────────────────────────────────────────────────────
+def test_aggregator_catastrophic_drop(aggregator):
+    start = RewardComponents(r1_hook_strength=0.8, r2_coherence=0.7)
+    start.compute_total()
+    current = RewardComponents(r1_hook_strength=0.3, r2_coherence=0.7)
+    result = aggregator.compute(current, start, [ActionType.HOOK_REWRITE])
+    assert result.total == 0.0
+def test_aggregator_diversity_penalty(aggregator):
+    start = RewardComponents(r1_hook_strength=0.6, r2_coherence=0.6)
+    start.compute_total()
+    current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
+    history = [ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE]
+    result = aggregator.compute(current, start, history)
+    assert result.anti_gaming_penalty == 0.15
+    assert result.total < 0.7
+def test_aggregator_no_penalty(aggregator):
+    start = RewardComponents(r1_hook_strength=0.6, r2_coherence=0.6)
+    start.compute_total()
+    current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
+    history = [ActionType.HOOK_REWRITE, ActionType.CTA_PLACEMENT, ActionType.SECTION_REORDER]
+    result = aggregator.compute(current, start, history)
+    assert result.anti_gaming_penalty == 0.0
+    assert result.total > 0