Spaces:

vajeeda
/

MetaDebate

Sleeping

App Files Files Community

vajeeda commited on Apr 25

Commit

258783b

1 Parent(s): 0290d7a

phase2 implemented

Browse files

Files changed (14) hide show

viral_script_engine/agents/baseline_arbitrator.py +83 -0
viral_script_engine/agents/defender.py +93 -0
viral_script_engine/data/cultural_kb.json +87 -0
viral_script_engine/data/golden_fixtures/fixture_S01.json +25 -25
viral_script_engine/data/golden_fixtures/fixture_S02.json +20 -20
viral_script_engine/environment/env.py +62 -4
viral_script_engine/rewards/r3_cultural_alignment.py +58 -0
viral_script_engine/rewards/r4_debate_resolution.py +82 -0
viral_script_engine/rewards/r5_defender_preservation.py +62 -0
viral_script_engine/rewards/reward_aggregator.py +44 -7
viral_script_engine/scripts/run_baseline.py +217 -0
viral_script_engine/tests/test_environment.py +37 -0
viral_script_engine/tests/test_phase2.py +325 -0
viral_script_engine/tests/test_rewards.py +3 -3

viral_script_engine/agents/baseline_arbitrator.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import json
+from viral_script_engine.agents.llm_backend import LLMBackend
+SYSTEM_PROMPT = """You are helping improve a short-form video script.
+You have observed a debate between a Critic and a Defender about the script.
+Choose ONE action to take to improve the script.
+Available actions: hook_rewrite, section_reorder, cultural_ref_sub, cta_placement
+Respond ONLY with valid JSON:
+{
+  "action_type": "hook_rewrite",
+  "target_section": "hook",
+  "instruction": "specific instruction for the rewriter",
+  "critique_claim_id": "C1",
+  "reasoning": "brief explanation"
+}"""
+STRICT_RETRY_SUFFIX = (
+    "\n\nIMPORTANT: Your previous response was not valid JSON. "
+    "Respond ONLY with the raw JSON object. No markdown fences, no explanation, no preamble."
+)
+_FALLBACK_ACTION = {
+    "action_type": "hook_rewrite",
+    "target_section": "hook",
+    "instruction": "Rewrite the hook to be more engaging and direct.",
+    "critique_claim_id": "C1",
+    "reasoning": "Default fallback action.",
+}
+class BaselineArbitratorAgent:
+    """
+    Untrained Arbitrator for the pre-training baseline.
+    Uses zero-shot instruction — no chain-of-thought, no few-shot examples.
+    This ensures the comparison is fair: trained model learns through RL, not prompting.
+    """
+    def __init__(self, backend: str = "groq", model_name: str = "llama-3.3-70b-versatile"):
+        self.llm = LLMBackend(backend=backend, model_name=model_name)
+    def _build_user_prompt(self, observation: dict) -> str:
+        script = observation.get("current_script", "")
+        debate = observation.get("debate_history", [])
+        last_claims = []
+        last_defense = None
+        if debate:
+            last_round = debate[-1]
+            last_claims = last_round.get("critic_claims", [])
+            last_defense = last_round.get("defender_response")
+        claims_text = ""
+        for c in last_claims:
+            claims_text += f"- [{c.get('claim_id','?')}] {c.get('claim_text','')} (severity: {c.get('severity','')})\n"
+        defense_text = ""
+        if last_defense:
+            defense_text = (
+                f"Defender preserved: {last_defense.get('core_strength_quote','')}\n"
+                f"Flagged claims: {last_defense.get('flagged_critic_claims', [])}\n"
+            )
+        return (
+            f"SCRIPT:\n{script}\n\n"
+            f"CRITIC CLAIMS:\n{claims_text or 'None'}\n"
+            f"DEFENDER:\n{defense_text or 'None'}\n\n"
+            "Choose one action to improve the script."
+        )
+    def act(self, observation: dict) -> dict:
+        user_prompt = self._build_user_prompt(observation)
+        raw = self.llm.generate(SYSTEM_PROMPT, user_prompt, max_tokens=512)
+        try:
+            return json.loads(raw)
+        except Exception:
+            strict_prompt = user_prompt + STRICT_RETRY_SUFFIX
+            raw2 = self.llm.generate(SYSTEM_PROMPT, strict_prompt, max_tokens=512)
+            try:
+                return json.loads(raw2)
+            except Exception:
+                return _FALLBACK_ACTION.copy()

viral_script_engine/agents/defender.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import json
+from typing import List
+from pydantic import BaseModel
+from viral_script_engine.agents.llm_backend import LLMBackend
+from viral_script_engine.agents.critic import CritiqueClaim
+SYSTEM_PROMPT = """You are a script defender for short-form video content. Your job is NOT to say the script is perfect.
+Your job is to identify what is genuinely working — and protect it from being edited away.
+Specifically:
+1. Find the single most powerful element of the script. Quote it exactly.
+2. Explain why a viewer would respond positively to this element.
+3. Review the Critic's claims. Flag any that would destroy the script's core strength or strip its regional authenticity if acted on.
+4. List any phrases, idioms, or references that are intentionally regional — these must not be "corrected" away.
+OUTPUT (JSON only, no preamble):
+{
+  "core_strength": "one sentence describing the strongest element",
+  "core_strength_quote": "exact verbatim quote from the script",
+  "defense_argument": "why this element should be preserved",
+  "flagged_critic_claims": ["C2", "C3"],
+  "regional_voice_elements": ["specific phrase 1", "specific phrase 2"]
+}"""
+STRICT_RETRY_SUFFIX = (
+    "\n\nIMPORTANT: Your previous response was not valid JSON. "
+    "Respond ONLY with the raw JSON object. No markdown fences, no explanation, no preamble."
+)
+class DefenderParseError(Exception):
+    pass
+class DefenderOutput(BaseModel):
+    core_strength: str
+    core_strength_quote: str
+    defense_argument: str
+    flagged_critic_claims: List[str]
+    regional_voice_elements: List[str]
+class DefenderAgent:
+    def __init__(self, backend: str = "groq", model_name: str = "llama-3.3-70b-versatile"):
+        self.llm = LLMBackend(backend=backend, model_name=model_name)
+    def _build_user_prompt(
+        self,
+        script: str,
+        critic_claims: List[CritiqueClaim],
+        region: str,
+        platform: str,
+    ) -> str:
+        claims_lines = []
+        for i, claim in enumerate(critic_claims, start=1):
+            claims_lines.append(
+                f"{i}. [{claim.claim_id}] ({claim.critique_class}) {claim.claim_text} | Evidence: {claim.evidence}"
+            )
+        claims_block = "\n".join(claims_lines) if claims_lines else "No critic claims provided."
+        return (
+            f"SCRIPT:\n{script}\n\n"
+            f"CRITIC CLAIMS:\n{claims_block}\n\n"
+            f"REGION: {region}\n"
+            f"PLATFORM: {platform}\n\n"
+            "Defend the script now."
+        )
+    def _parse_response(self, raw: str, user_prompt: str) -> DefenderOutput:
+        try:
+            data = json.loads(raw)
+            return DefenderOutput(**data)
+        except Exception:
+            strict_prompt = user_prompt + STRICT_RETRY_SUFFIX
+            raw2 = self.llm.generate(SYSTEM_PROMPT, strict_prompt, max_tokens=1024)
+            try:
+                data = json.loads(raw2)
+                return DefenderOutput(**data)
+            except Exception as e:
+                raise DefenderParseError(f"Failed to parse defender output after 2 attempts: {e}")
+    def defend(
+        self,
+        script: str,
+        critic_claims: List[CritiqueClaim],
+        region: str,
+        platform: str,
+    ) -> DefenderOutput:
+        user_prompt = self._build_user_prompt(script, critic_claims, region, platform)
+        raw = self.llm.generate(SYSTEM_PROMPT, user_prompt, max_tokens=1024)
+        return self._parse_response(raw, user_prompt)

viral_script_engine/data/cultural_kb.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "mumbai_gen_z": {
+    "valid_refs": [
+      "Bandra", "CSMT", "dabba", "auto", "local train", "startup scene",
+      "Zomato", "Swiggy", "IPL", "sea link", "Juhu", "Versova", "Aarey",
+      "SoBo", "Powai", "Andheri", "Dharavi", "Bandstand", "Colaba",
+      "Bollywood 2020s"
+    ],
+    "invalid_signals": [
+      "trunk call", "STD booth", "pager", "cassette", "VHS", "walkman",
+      "disco", "beeper", "doordarshan only", "telegram boy",
+      "ration card queue", "old delhi", "tonga", "coolie",
+      "chawl gossip pre-2000"
+    ],
+    "correct_idioms": [
+      "ek dum solid", "full on", "kya scene hai", "bindaas", "jugaad nahi",
+      "yaar sun", "bhai chill maar", "ekdum mast", "kya baat hai",
+      "ekdum lit", "arey yaar", "full on vibe", "kya scene hai bhai",
+      "timepass nahi", "mast hai re", "solid plan", "chal be"
+    ],
+    "anachronistic_signals": []
+  },
+  "tier2_hindi_belt": {
+    "valid_refs": [
+      "kirana store", "mandap", "jugaad", "sabzi mandi", "panchayat",
+      "mela", "dal-chawal", "halwai", "tehsil", "chaupal", "gaon",
+      "kheti", "bori", "dhaba", "cycle", "tubewell", "Kanpur",
+      "Lucknow", "Patna", "Varanasi"
+    ],
+    "invalid_signals": [
+      "unicorn startup", "runway", "pivot", "SaaS", "venture capital",
+      "IPO", "fintech", "coworking space", "metro commute", "craft beer",
+      "avocado toast", "brunch", "penthouse", "valet parking",
+      "rooftop lounge", "angel investor", "Series A", "growth hacking",
+      "disruptive"
+    ],
+    "correct_idioms": [
+      "bilkul sahi", "arey bhai", "baat pakki", "suno ji", "haan ji",
+      "seedha baat", "desi style", "apna kaam", "ek number",
+      "bahut badhiya", "kya baat", "sahi mein", "dhamaal", "arre wah",
+      "mast jugaad", "thoda adjust karo", "kal milte hain"
+    ],
+    "anachronistic_signals": []
+  },
+  "pan_india_english": {
+    "valid_refs": [
+      "India", "Indian", "subcontinent", "festivals", "cricket", "desi",
+      "masala", "biryani", "chai", "temple", "monsoon", "Diwali",
+      "Holi", "market", "college"
+    ],
+    "invalid_signals": [
+      "kachcha road", "pind", "naali", "akhada", "maidan ke paas",
+      "chakki", "chulha", "dung cake", "lathicharge", "tehsildar",
+      "patwari", "sarpanch only", "gaon waale", "pradhan ji", "khet mein"
+    ],
+    "correct_idioms": [
+      "absolutely", "for sure", "totally agree", "makes sense",
+      "of course", "right", "exactly", "fair point", "well said",
+      "spot on", "valid point", "true that", "no doubt", "solid",
+      "good point", "great insight", "well done"
+    ],
+    "anachronistic_signals": []
+  },
+  "hinglish": {
+    "valid_refs": [
+      "yaar", "bhai", "dost", "scene", "vibe", "timepass", "jugaad",
+      "mast", "bindaas", "desi", "ekdum", "kal", "aaj", "kya", "kaise"
+    ],
+    "invalid_signals": [
+      "I am going to the marketplace", "Please be advised", "Pursuant to",
+      "Herewith", "As per our discussion", "In reference to",
+      "Kindly note", "I beg to inform", "With due respect",
+      "at your earliest convenience", "enclosed herewith",
+      "please find attached", "as discussed", "further to my email",
+      "I hope this email finds you"
+    ],
+    "correct_idioms": [
+      "yaar sun", "bhai ekdum", "kya baat hai", "arey chill maar",
+      "solid plan hai", "mast scene hai", "full on enjoy",
+      "ekdum bindaas", "bhai sahi bol raha hai", "kya jugaad lagaya",
+      "ek dum mast", "timepass nahi yaar", "kya vibe hai",
+      "full on mast", "ekdum sahi", "arey yaar kya scene",
+      "bas ek chance"
+    ],
+    "anachronistic_signals": []
+  }
+}

viral_script_engine/data/golden_fixtures/fixture_S01.json CHANGED Viewed

@@ -8,59 +8,59 @@
       {
         "claim_id": "C1",
         "critique_class": "hook_weakness",
-        "claim_text": "The hook at 0:00-0:10 promises a life-changing secret but delays its reveal until 0:45, by which time some viewers may have lost interest",
-        "timestamp_range": "0:00-0:10",
-        "evidence": "Okay so real talk \u00e2\u20ac\u201d I've been broke my whole life. Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C2",
         "critique_class": "pacing_issue",
-        "claim_text": "The script spends 15 seconds showing off the apartment at 0:20-0:35, which may slow down the narrative and cause some viewers to lose interest",
-        "timestamp_range": "0:20-0:35",
-        "evidence": "But first, let me show you my apartment. Pretty nice right? Took me three years to get here.",
         "is_falsifiable": true,
-        "severity": "low"
       },
       {
         "claim_id": "C3",
-        "critique_class": "coherence_break",
-        "claim_text": "The transition from 'I've been broke my whole life' to 'my apartment is pretty nice' at 0:10-0:20 is abrupt and may confuse some viewers",
-        "timestamp_range": "0:10-0:20",
-        "evidence": "Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything. But first, let me show you my apartment.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C4",
-        "critique_class": "cta_buried",
-        "claim_text": "The call-to-action to follow the creator for more information is buried at the end of the script at 1:00-1:05 and may be missed by viewers who don't watch until the end",
-        "timestamp_range": "1:00-1:05",
         "evidence": "If you want to know which funds I use, follow me and I'll post the list tomorrow.",
         "is_falsifiable": true,
         "severity": "high"
       },
       {
         "claim_id": "C5",
-        "critique_class": "retention_risk",
-        "claim_text": "The script assumes the viewer is familiar with mutual funds and SIPs at 0:40-0:50, which may cause some viewers to feel lost or disconnected from the content",
-        "timestamp_range": "0:40-0:50",
-        "evidence": "The secret? Mutual funds. Just SIPs.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C6",
-        "critique_class": "cultural_mismatch",
-        "claim_text": "The script uses a very casual tone at 0:00-0:10, which may not resonate with all viewers in the target region, particularly those who prefer more formal or professional content",
-        "timestamp_range": "0:00-0:10",
-        "evidence": "Okay so real talk \u00e2\u20ac\u201d I've been broke my whole life.",
         "is_falsifiable": true,
-        "severity": "low"
       }
     ],
-    "overall_severity": "medium",
-    "raw_response": "{\n  \"claims\": [\n    {\n      \"claim_id\": \"C1\",\n      \"critique_class\": \"hook_weakness\",\n      \"claim_text\": \"The hook at 0:00-0:10 promises a life-changing secret but delays its reveal until 0:45, by which time some viewers may have lost interest\",\n      \"timestamp_range\": \"0:00-0:10\",\n      \"evidence\": \"Okay so real talk \u00e2\u20ac\u201d I've been broke my whole life. Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C2\",\n      \"critique_class\": \"pacing_issue\",\n      \"claim_text\": \"The script spends 15 seconds showing off the apartment at 0:20-0:35, which may slow down the narrative and cause some viewers to lose interest\",\n      \"timestamp_range\": \"0:20-0:35\",\n      \"evidence\": \"But first, let me show you my apartment. Pretty nice right? Took me three years to get here.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    },\n    {\n      \"claim_id\": \"C3\",\n      \"critique_class\": \"coherence_break\",\n      \"claim_text\": \"The transition from 'I've been broke my whole life' to 'my apartment is pretty nice' at 0:10-0:20 is abrupt and may confuse some viewers\",\n      \"timestamp_range\": \"0:10-0:20\",\n      \"evidence\": \"Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything. But first, let me show you my apartment.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C4\",\n      \"critique_class\": \"cta_buried\",\n      \"claim_text\": \"The call-to-action to follow the creator for more information is buried at the end of the script at 1:00-1:05 and may be missed by viewers who don't watch until the end\",\n      \"timestamp_range\": \"1:00-1:05\",\n      \"evidence\": \"If you want to know which funds I use, follow me and I'll post the list tomorrow.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    },\n    {\n      \"claim_id\": \"C5\",\n      \"critique_class\": \"retention_risk\",\n      \"claim_text\": \"The script assumes the viewer is familiar with mutual funds and SIPs at 0:40-0:50, which may cause some viewers to feel lost or disconnected from the content\",\n      \"timestamp_range\": \"0:40-0:50\",\n      \"evidence\": \"The secret? Mutual funds. Just SIPs.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C6\",\n      \"critique_class\": \"cultural_mismatch\",\n      \"claim_text\": \"The script uses a very casual tone at 0:00-0:10, which may not resonate with all viewers in the target region, particularly those who prefer more formal or professional content\",\n      \"timestamp_range\": \"0:00-0:10\",\n      \"evidence\": \"Okay so real talk \u00e2\u20ac\u201d I've been broke my whole life.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    }\n  ],\n  \"overall_severity\": \"medium\"\n}"
   }
 }

       {
         "claim_id": "C1",
         "critique_class": "hook_weakness",
+        "claim_text": "The hook promises a life-changing trick but takes 0:22 seconds to reveal, by which time viewers might have lost interest",
+        "timestamp_range": "0:00-0:03",
+        "evidence": "I've been broke my whole life. Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C2",
         "critique_class": "pacing_issue",
+        "claim_text": "The script switches abruptly from the personal story to a promotional message at 0:45, disrupting the narrative flow",
+        "timestamp_range": "0:45-0:55",
+        "evidence": "The secret? Mutual funds. Just SIPs. I'm serious.",
         "is_falsifiable": true,
+        "severity": "high"
       },
       {
         "claim_id": "C3",
+        "critique_class": "cta_buried",
+        "claim_text": "The call-to-action to follow the creator for the list of funds is buried at the end of the script and might be missed by viewers",
+        "timestamp_range": "1:10-1:15",
+        "evidence": "If you want to know which funds I use, follow me and I'll post the list tomorrow.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C4",
+        "critique_class": "retention_risk",
+        "claim_text": "The script assumes viewers will wait for the next day's post for the list of funds, which might lead to a drop in retention",
+        "timestamp_range": "1:10-1:15",
         "evidence": "If you want to know which funds I use, follow me and I'll post the list tomorrow.",
         "is_falsifiable": true,
         "severity": "high"
       },
       {
         "claim_id": "C5",
+        "critique_class": "coherence_break",
+        "claim_text": "The script's tone shifts from relatable and personal to overly promotional and sales-y, potentially alienating the target audience",
+        "timestamp_range": "N/A",
+        "evidence": "The entire script's tone and language",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C6",
+        "critique_class": "pacing_issue",
+        "claim_text": "The script tries to cram too much information and calls-to-action in the last 10 seconds, which might overwhelm viewers",
+        "timestamp_range": "1:15-1:20",
+        "evidence": "Like and save this video before Instagram hides it.",
         "is_falsifiable": true,
+        "severity": "high"
       }
     ],
+    "overall_severity": "high",
+    "raw_response": "{\n  \"claims\": [\n    {\n      \"claim_id\": \"C1\",\n      \"critique_class\": \"hook_weakness\",\n      \"claim_text\": \"The hook promises a life-changing trick but takes 0:22 seconds to reveal, by which time viewers might have lost interest\",\n      \"timestamp_range\": \"0:00-0:03\",\n      \"evidence\": \"I've been broke my whole life. Like actually broke. Not the aesthetic broke, the can't-pay-rent broke. And then one day I found this one trick that changed everything.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C2\",\n      \"critique_class\": \"pacing_issue\",\n      \"claim_text\": \"The script switches abruptly from the personal story to a promotional message at 0:45, disrupting the narrative flow\",\n      \"timestamp_range\": \"0:45-0:55\",\n      \"evidence\": \"The secret? Mutual funds. Just SIPs. I'm serious.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    },\n    {\n      \"claim_id\": \"C3\",\n      \"critique_class\": \"cta_buried\",\n      \"claim_text\": \"The call-to-action to follow the creator for the list of funds is buried at the end of the script and might be missed by viewers\",\n      \"timestamp_range\": \"1:10-1:15\",\n      \"evidence\": \"If you want to know which funds I use, follow me and I'll post the list tomorrow.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C4\",\n      \"critique_class\": \"retention_risk\",\n      \"claim_text\": \"The script assumes viewers will wait for the next day's post for the list of funds, which might lead to a drop in retention\",\n      \"timestamp_range\": \"1:10-1:15\",\n      \"evidence\": \"If you want to know which funds I use, follow me and I'll post the list tomorrow.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    },\n    {\n      \"claim_id\": \"C5\",\n      \"critique_class\": \"coherence_break\",\n      \"claim_text\": \"The script's tone shifts from relatable and personal to overly promotional and sales-y, potentially alienating the target audience\",\n      \"timestamp_range\": \"N/A\",\n      \"evidence\": \"The entire script's tone and language\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C6\",\n      \"critique_class\": \"pacing_issue\",\n      \"claim_text\": \"The script tries to cram too much information and calls-to-action in the last 10 seconds, which might overwhelm viewers\",\n      \"timestamp_range\": \"1:15-1:20\",\n      \"evidence\": \"Like and save this video before Instagram hides it.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    }\n  ],\n  \"overall_severity\": \"high\"\n}"
   }
 }

viral_script_engine/data/golden_fixtures/fixture_S02.json CHANGED Viewed

@@ -8,59 +8,59 @@
       {
         "claim_id": "C1",
         "critique_class": "hook_weakness",
-        "claim_text": "The hook at 0:00-0:03 promises five outfits for one thousand rupees, but the reveal at 2:04-2:10 shows the total cost is five hundred rupees, which may be confusing for viewers who were expecting to see how to create outfits for one thousand rupees",
-        "timestamp_range": "0:00-0:03, 2:04-2:10",
-        "evidence": "Five outfits, one thousand rupees. ... Grand total \u00e2\u20ac\u201d five hundred rupees for five outfits.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C2",
         "critique_class": "pacing_issue",
-        "claim_text": "The script at 0:44-0:56 has a pacing issue where the creator says 'wait I need to find it. Okay found it.' which disrupts the flow of showcasing outfits",
-        "timestamp_range": "0:44-0:56",
-        "evidence": "Outfit three \u00e2\u20ac\u201d wait I need to find it. Okay found it.",
         "is_falsifiable": true,
         "severity": "low"
       },
       {
         "claim_id": "C3",
         "critique_class": "retention_risk",
-        "claim_text": "The script at 2:20-2:30 has a retention risk where the creator asks viewers to comment their city, which may not be engaging enough to retain viewers' interest",
-        "timestamp_range": "2:20-2:30",
-        "evidence": "Comment your city and I'll do a version for your local markets.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C4",
-        "critique_class": "coherence_break",
-        "claim_text": "The script at 1:46-1:54 has a coherence break where the creator abruptly shifts from showcasing outfits to talking about a saree drape tutorial",
-        "timestamp_range": "1:46-1:54",
-        "evidence": "Outfit five \u00e2\u20ac\u201d this entire saree drape tutorial took me two hours",
         "is_falsifiable": true,
         "severity": "high"
       },
       {
         "claim_id": "C5",
-        "critique_class": "cta_buried",
-        "claim_text": "The script buries the call-to-action at 2:20-2:30, where the creator asks viewers to comment their city, but it's not prominent enough",
-        "timestamp_range": "2:20-2:30",
-        "evidence": "Comment your city and I'll do a version for your local markets.",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C6",
         "critique_class": "cultural_mismatch",
-        "claim_text": "The script assumes all viewers are familiar with Mumbai-based markets like Linking Road and Sarojini Nagar, which may not be relatable to viewers from other regions",
         "timestamp_range": "N/A",
-        "evidence": "Linking Road, Sarojini Nagar",
         "is_falsifiable": true,
         "severity": "low"
       }
     ],
     "overall_severity": "medium",
-    "raw_response": "{\n  \"claims\": [\n    {\n      \"claim_id\": \"C1\",\n      \"critique_class\": \"hook_weakness\",\n      \"claim_text\": \"The hook at 0:00-0:03 promises five outfits for one thousand rupees, but the reveal at 2:04-2:10 shows the total cost is five hundred rupees, which may be confusing for viewers who were expecting to see how to create outfits for one thousand rupees\",\n      \"timestamp_range\": \"0:00-0:03, 2:04-2:10\",\n      \"evidence\": \"Five outfits, one thousand rupees. ... Grand total \u00e2\u20ac\u201d five hundred rupees for five outfits.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C2\",\n      \"critique_class\": \"pacing_issue\",\n      \"claim_text\": \"The script at 0:44-0:56 has a pacing issue where the creator says 'wait I need to find it. Okay found it.' which disrupts the flow of showcasing outfits\",\n      \"timestamp_range\": \"0:44-0:56\",\n      \"evidence\": \"Outfit three \u00e2\u20ac\u201d wait I need to find it. Okay found it.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    },\n    {\n      \"claim_id\": \"C3\",\n      \"critique_class\": \"retention_risk\",\n      \"claim_text\": \"The script at 2:20-2:30 has a retention risk where the creator asks viewers to comment their city, which may not be engaging enough to retain viewers' interest\",\n      \"timestamp_range\": \"2:20-2:30\",\n      \"evidence\": \"Comment your city and I'll do a version for your local markets.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C4\",\n      \"critique_class\": \"coherence_break\",\n      \"claim_text\": \"The script at 1:46-1:54 has a coherence break where the creator abruptly shifts from showcasing outfits to talking about a saree drape tutorial\",\n      \"timestamp_range\": \"1:46-1:54\",\n      \"evidence\": \"Outfit five \u00e2\u20ac\u201d this entire saree drape tutorial took me two hours\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    },\n    {\n      \"claim_id\": \"C5\",\n      \"critique_class\": \"cta_buried\",\n      \"claim_text\": \"The script buries the call-to-action at 2:20-2:30, where the creator asks viewers to comment their city, but it's not prominent enough\",\n      \"timestamp_range\": \"2:20-2:30\",\n      \"evidence\": \"Comment your city and I'll do a version for your local markets.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C6\",\n      \"critique_class\": \"cultural_mismatch\",\n      \"claim_text\": \"The script assumes all viewers are familiar with Mumbai-based markets like Linking Road and Sarojini Nagar, which may not be relatable to viewers from other regions\",\n      \"timestamp_range\": \"N/A\",\n      \"evidence\": \"Linking Road, Sarojini Nagar\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    }\n  ],\n  \"overall_severity\": \"medium\"\n}"
   }
 }

       {
         "claim_id": "C1",
         "critique_class": "hook_weakness",
+        "claim_text": "The hook at 0:00-0:03 promises five outfits for one thousand rupees, but the reveal at 1:45 shows the total cost is five hundred rupees, which may confuse viewers",
+        "timestamp_range": "0:00-0:03, 1:45",
+        "evidence": "Five outfits, one thousand rupees",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C2",
         "critique_class": "pacing_issue",
+        "claim_text": "The script has an abrupt pause at 0:45-0:50 where the creator says 'wait I need to find it. Okay found it.', which disrupts the flow of the video",
+        "timestamp_range": "0:45-0:50",
+        "evidence": "wait I need to find it. Okay found it.",
         "is_falsifiable": true,
         "severity": "low"
       },
       {
         "claim_id": "C3",
         "critique_class": "retention_risk",
+        "claim_text": "The creator asks viewers to 'save this video' at 1:20-1:25, but this call-to-action may not be compelling enough to encourage viewers to take action",
+        "timestamp_range": "1:20-1:25",
+        "evidence": "this entire saree drape tutorial took me two hours so please save this video",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C4",
+        "critique_class": "cta_buried",
+        "claim_text": "The call-to-action 'Comment your city and I'll do a version for your local markets' is buried at the end of the script at 1:50-2:00, which may be missed by viewers who drop off early",
+        "timestamp_range": "1:50-2:00",
+        "evidence": "Comment your city and I'll do a version for your local markets",
         "is_falsifiable": true,
         "severity": "high"
       },
       {
         "claim_id": "C5",
+        "critique_class": "coherence_break",
+        "claim_text": "The script jumps abruptly from showcasing outfits to providing a grand total at 1:45, without a clear transition or summary of the previous outfits",
+        "timestamp_range": "1:40-1:50",
+        "evidence": "Grand total \u00e2\u20ac\u201d five hundred rupees for five outfits",
         "is_falsifiable": true,
         "severity": "medium"
       },
       {
         "claim_id": "C6",
         "critique_class": "cultural_mismatch",
+        "claim_text": "The script assumes that all viewers are familiar with Mumbai's local markets and terms like 'Linking Road' and 'Sarojini Nagar', which may not resonate with viewers from other regions",
         "timestamp_range": "N/A",
+        "evidence": "thrifted kurta from Linking Road, forty rupees, styled with mom's old dupatta, zero rupees",
         "is_falsifiable": true,
         "severity": "low"
       }
     ],
     "overall_severity": "medium",
+    "raw_response": "{\n  \"claims\": [\n    {\n      \"claim_id\": \"C1\",\n      \"critique_class\": \"hook_weakness\",\n      \"claim_text\": \"The hook at 0:00-0:03 promises five outfits for one thousand rupees, but the reveal at 1:45 shows the total cost is five hundred rupees, which may confuse viewers\",\n      \"timestamp_range\": \"0:00-0:03, 1:45\",\n      \"evidence\": \"Five outfits, one thousand rupees\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C2\",\n      \"critique_class\": \"pacing_issue\",\n      \"claim_text\": \"The script has an abrupt pause at 0:45-0:50 where the creator says 'wait I need to find it. Okay found it.', which disrupts the flow of the video\",\n      \"timestamp_range\": \"0:45-0:50\",\n      \"evidence\": \"wait I need to find it. Okay found it.\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    },\n    {\n      \"claim_id\": \"C3\",\n      \"critique_class\": \"retention_risk\",\n      \"claim_text\": \"The creator asks viewers to 'save this video' at 1:20-1:25, but this call-to-action may not be compelling enough to encourage viewers to take action\",\n      \"timestamp_range\": \"1:20-1:25\",\n      \"evidence\": \"this entire saree drape tutorial took me two hours so please save this video\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C4\",\n      \"critique_class\": \"cta_buried\",\n      \"claim_text\": \"The call-to-action 'Comment your city and I'll do a version for your local markets' is buried at the end of the script at 1:50-2:00, which may be missed by viewers who drop off early\",\n      \"timestamp_range\": \"1:50-2:00\",\n      \"evidence\": \"Comment your city and I'll do a version for your local markets\",\n      \"is_falsifiable\": true,\n      \"severity\": \"high\"\n    },\n    {\n      \"claim_id\": \"C5\",\n      \"critique_class\": \"coherence_break\",\n      \"claim_text\": \"The script jumps abruptly from showcasing outfits to providing a grand total at 1:45, without a clear transition or summary of the previous outfits\",\n      \"timestamp_range\": \"1:40-1:50\",\n      \"evidence\": \"Grand total \u00e2\u20ac\u201d five hundred rupees for five outfits\",\n      \"is_falsifiable\": true,\n      \"severity\": \"medium\"\n    },\n    {\n      \"claim_id\": \"C6\",\n      \"critique_class\": \"cultural_mismatch\",\n      \"claim_text\": \"The script assumes that all viewers are familiar with Mumbai's local markets and terms like 'Linking Road' and 'Sarojini Nagar', which may not resonate with viewers from other regions\",\n      \"timestamp_range\": \"N/A\",\n      \"evidence\": \"thrifted kurta from Linking Road, forty rupees, styled with mom's old dupatta, zero rupees\",\n      \"is_falsifiable\": true,\n      \"severity\": \"low\"\n    }\n  ],\n  \"overall_severity\": \"medium\"\n}"
   }
 }

viral_script_engine/environment/env.py CHANGED Viewed

@@ -3,6 +3,7 @@ import random
 from typing import Optional, Tuple
 from viral_script_engine.agents.critic import CriticAgent
 from viral_script_engine.agents.rewriter import RewriterAgent
 from viral_script_engine.environment.actions import ArbitratorAction
 from viral_script_engine.environment.episode_state import EpisodeState
@@ -11,6 +12,9 @@ from viral_script_engine.environment.observations import (
 )
 from viral_script_engine.rewards.r1_hook_strength import HookStrengthReward
 from viral_script_engine.rewards.r2_coherence import CoherenceReward
 from viral_script_engine.rewards.reward_aggregator import RewardAggregator
 _TIERS = {
@@ -28,6 +32,7 @@ class ViralScriptEnv:
         max_steps: int = 5,
         difficulty: str = "easy",
         use_anti_gaming: bool = True,
     ):
         self.max_steps = max_steps
         self.difficulty = difficulty
@@ -39,9 +44,13 @@ class ViralScriptEnv:
         tier_ids = _TIERS[difficulty]
         self._scripts = [s for s in all_scripts if s["script_id"] in tier_ids]
         self.critic = CriticAgent()
         self.rewriter = RewriterAgent()
         self.r1 = HookStrengthReward()
         self.r2 = CoherenceReward()
         self.aggregator = RewardAggregator()
         self._state: Optional[EpisodeState] = None
@@ -52,9 +61,11 @@ class ViralScriptEnv:
         r1_result = self.r1.score(script["script_text"])
         r2_result = self.r2.score(script["script_text"], script["script_text"])
         initial_rewards = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
         )
         initial_rewards.compute_total()
@@ -79,27 +90,68 @@ class ViralScriptEnv:
             niche=self._state.niche,
         )
         rewrite_result = self.rewriter.rewrite(self._state.current_script, arb_action)
         new_script = rewrite_result.rewritten_script
         r1_result = self.r1.score(new_script)
         r2_result = self.r2.score(self._state.original_script, new_script)
         components = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
         )
         self._state.action_history.append(arb_action.action_type)
         if self.use_anti_gaming:
-            components = self.aggregator.compute(
-                components, self._state.episode_start_rewards, self._state.action_history
             )
         else:
             components.compute_total()
         round_ = DebateRound(
             step_num=self._state.step_num,
             critic_claims=critique.claims,
             arbitrator_action=arb_action,
             rewrite_diff=rewrite_result.diff,
             reward_components=components,
@@ -109,14 +161,19 @@ class ViralScriptEnv:
         self._state.last_reward_components = components
         self._state.step_num += 1
         terminated = (
             self._state.step_num >= self._state.max_steps
             or components.total >= 0.9
         )
         info = {
             "reward_components": components.model_dump(),
-            "anti_gaming_triggered": components.anti_gaming_penalty > 0,
-            "penalty_reason": "anti_gaming" if components.anti_gaming_penalty > 0 else None,
         }
         return self._build_observation().model_dump(), components.total, terminated, False, info
@@ -132,6 +189,7 @@ class ViralScriptEnv:
             "step_num": s.step_num,
             "difficulty_level": s.difficulty_level,
             "episode_id": s.episode_id,
         }
     def _build_observation(self) -> Observation:

 from typing import Optional, Tuple
 from viral_script_engine.agents.critic import CriticAgent
+from viral_script_engine.agents.defender import DefenderAgent
 from viral_script_engine.agents.rewriter import RewriterAgent
 from viral_script_engine.environment.actions import ArbitratorAction
 from viral_script_engine.environment.episode_state import EpisodeState
 )
 from viral_script_engine.rewards.r1_hook_strength import HookStrengthReward
 from viral_script_engine.rewards.r2_coherence import CoherenceReward
+from viral_script_engine.rewards.r3_cultural_alignment import CulturalAlignmentReward
+from viral_script_engine.rewards.r4_debate_resolution import DebateResolutionReward
+from viral_script_engine.rewards.r5_defender_preservation import DefenderPreservationReward
 from viral_script_engine.rewards.reward_aggregator import RewardAggregator
 _TIERS = {
         max_steps: int = 5,
         difficulty: str = "easy",
         use_anti_gaming: bool = True,
+        cultural_kb_path: str = "data/cultural_kb.json",
     ):
         self.max_steps = max_steps
         self.difficulty = difficulty
         tier_ids = _TIERS[difficulty]
         self._scripts = [s for s in all_scripts if s["script_id"] in tier_ids]
         self.critic = CriticAgent()
+        self.defender = DefenderAgent()
         self.rewriter = RewriterAgent()
         self.r1 = HookStrengthReward()
         self.r2 = CoherenceReward()
+        self.r3 = CulturalAlignmentReward(knowledge_base_path=cultural_kb_path)
+        self.r4 = DebateResolutionReward(critic_agent=self.critic)
+        self.r5 = DefenderPreservationReward()
         self.aggregator = RewardAggregator()
         self._state: Optional[EpisodeState] = None
         r1_result = self.r1.score(script["script_text"])
         r2_result = self.r2.score(script["script_text"], script["script_text"])
+        r3_result = self.r3.score(script["script_text"], script.get("region", "pan_india_english"))
         initial_rewards = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
+            r3_cultural_alignment=r3_result.score,
         )
         initial_rewards.compute_total()
             niche=self._state.niche,
         )
+        defender_output = self.defender.defend(
+            script=self._state.current_script,
+            critic_claims=critique.claims,
+            region=self._state.region,
+            platform=self._state.platform,
+        )
         rewrite_result = self.rewriter.rewrite(self._state.current_script, arb_action)
         new_script = rewrite_result.rewritten_script
         r1_result = self.r1.score(new_script)
         r2_result = self.r2.score(self._state.original_script, new_script)
+        r3_result = self.r3.score(new_script, self._state.region)
+        targeted_claim = next(
+            (c for c in critique.claims if c.claim_id == arb_action.critique_claim_id),
+            critique.claims[0] if critique.claims else None,
+        )
+        r4_result = self.r4.score(
+            new_script=new_script,
+            original_action=arb_action,
+            original_claim=targeted_claim,
+            region=self._state.region,
+            platform=self._state.platform,
+            niche=self._state.niche,
+        ) if targeted_claim else None
+        r5_result = self.r5.score(defender_output, new_script)
         components = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
+            r3_cultural_alignment=r3_result.score,
+            r4_debate_resolution=r4_result.score if r4_result else None,
+            r5_defender_preservation=r5_result.score,
         )
         self._state.action_history.append(arb_action.action_type)
         if self.use_anti_gaming:
+            components, anti_log = self.aggregator.compute(
+                components,
+                self._state.episode_start_rewards,
+                self._state.action_history,
+                episode_id=self._state.episode_id,
+                step_num=self._state.step_num,
             )
         else:
             components.compute_total()
+            from viral_script_engine.rewards.reward_aggregator import AntiGamingLog
+            anti_log = AntiGamingLog(
+                episode_id=self._state.episode_id,
+                step_num=self._state.step_num,
+                triggered=False,
+                penalty_applied=0.0,
+                pre_penalty_total=components.total,
+                post_penalty_total=components.total,
+            )
         round_ = DebateRound(
             step_num=self._state.step_num,
             critic_claims=critique.claims,
+            defender_response=defender_output.model_dump(),
             arbitrator_action=arb_action,
             rewrite_diff=rewrite_result.diff,
             reward_components=components,
         self._state.last_reward_components = components
         self._state.step_num += 1
+        if not hasattr(self._state, "anti_gaming_logs"):
+            self._state.anti_gaming_logs = []
+        self._state.anti_gaming_logs.append(anti_log.model_dump())
         terminated = (
             self._state.step_num >= self._state.max_steps
             or components.total >= 0.9
         )
         info = {
             "reward_components": components.model_dump(),
+            "anti_gaming_triggered": anti_log.triggered,
+            "penalty_reason": anti_log.rule_triggered,
+            "anti_gaming_log": anti_log.model_dump(),
         }
         return self._build_observation().model_dump(), components.total, terminated, False, info
             "step_num": s.step_num,
             "difficulty_level": s.difficulty_level,
             "episode_id": s.episode_id,
+            "anti_gaming_logs": getattr(s, "anti_gaming_logs", []),
         }
     def _build_observation(self) -> Observation:

viral_script_engine/rewards/r3_cultural_alignment.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from dataclasses import dataclass
+from typing import List
+import json
+import re
+@dataclass
+class CulturalRewardResult:
+    score: float
+    valid_refs_found: List[str]
+    correct_idioms_found: List[str]
+    invalid_signals_found: List[str]
+    anachronistic_signals_found: List[str]
+    region: str
+class CulturalAlignmentReward:
+    def __init__(self, knowledge_base_path: str = "data/cultural_kb.json"):
+        with open(knowledge_base_path, "r", encoding="utf-8") as f:
+            self._kb = json.load(f)
+    def score(self, script: str, region: str) -> CulturalRewardResult:
+        if region not in self._kb:
+            return CulturalRewardResult(
+                score=0.5,
+                valid_refs_found=[],
+                correct_idioms_found=[],
+                invalid_signals_found=[],
+                anachronistic_signals_found=[],
+                region=region,
+            )
+        kb = self._kb[region]
+        script_lower = script.lower()
+        valid_refs_found = [r for r in kb["valid_refs"] if r.lower() in script_lower]
+        correct_idioms_found = [i for i in kb["correct_idioms"] if i.lower() in script_lower]
+        invalid_signals_found = [s for s in kb["invalid_signals"] if s.lower() in script_lower]
+        anachronistic_signals_found = [a for a in kb["anachronistic_signals"] if a.lower() in script_lower]
+        numerator = (
+            len(valid_refs_found)
+            + len(correct_idioms_found)
+            - len(invalid_signals_found)
+            - len(anachronistic_signals_found)
+        )
+        denominator = max(len(kb["valid_refs"]) + len(kb["correct_idioms"]), 1)
+        raw_score = numerator / denominator
+        clipped_score = max(0.0, min(1.0, raw_score))
+        return CulturalRewardResult(
+            score=clipped_score,
+            valid_refs_found=valid_refs_found,
+            correct_idioms_found=correct_idioms_found,
+            invalid_signals_found=invalid_signals_found,
+            anachronistic_signals_found=anachronistic_signals_found,
+            region=region,
+        )

viral_script_engine/rewards/r4_debate_resolution.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from dataclasses import dataclass
+from viral_script_engine.agents.critic import CriticAgent, CritiqueClaim
+from viral_script_engine.environment.actions import ArbitratorAction
+@dataclass
+class DebateResolutionResult:
+    score: float
+    resolution_status: str
+    original_claim_id: str
+    original_claim_class: str
+    new_claims_count: int
+def _parse_seconds(ts: str) -> float:
+    if not ts or ts == "N/A":
+        return -1.0
+    try:
+        start = ts.split("-")[0].strip()
+        parts = start.split(":")
+        return float(parts[0]) * 60 + float(parts[1])
+    except Exception:
+        return -1.0
+class DebateResolutionReward:
+    def __init__(self, critic_agent: CriticAgent):
+        self.critic = critic_agent
+    def score(
+        self,
+        new_script: str,
+        original_action: ArbitratorAction,
+        original_claim: CritiqueClaim,
+        region: str,
+        platform: str,
+        niche: str,
+    ) -> DebateResolutionResult:
+        new_critique = self.critic.critique(new_script, region, platform, niche)
+        new_claims = new_critique.claims
+        orig_ts = _parse_seconds(original_claim.timestamp_range)
+        orig_class = original_claim.critique_class
+        matching = []
+        for c in new_claims:
+            if c.critique_class != orig_class:
+                continue
+            if orig_ts == -1.0:
+                matching.append(c)
+            else:
+                new_ts = _parse_seconds(c.timestamp_range)
+                if new_ts != -1.0 and abs(new_ts - orig_ts) <= 5:
+                    matching.append(c)
+        if not matching:
+            return DebateResolutionResult(
+                score=1.0,
+                resolution_status="resolved",
+                original_claim_id=original_claim.claim_id,
+                original_claim_class=orig_class,
+                new_claims_count=len(new_claims),
+            )
+        worst = max(matching, key=lambda c: {"high": 2, "medium": 1, "low": 0}.get(c.severity, 1))
+        if worst.severity == "low":
+            return DebateResolutionResult(
+                score=0.5,
+                resolution_status="partially_resolved",
+                original_claim_id=original_claim.claim_id,
+                original_claim_class=orig_class,
+                new_claims_count=len(new_claims),
+            )
+        return DebateResolutionResult(
+            score=0.0,
+            resolution_status="persists",
+            original_claim_id=original_claim.claim_id,
+            original_claim_class=orig_class,
+            new_claims_count=len(new_claims),
+        )

viral_script_engine/rewards/r5_defender_preservation.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from dataclasses import dataclass
+from typing import List
+from viral_script_engine.agents.defender import DefenderOutput
+@dataclass
+class DefenderPreservationResult:
+    score: float
+    max_similarity: float
+    best_matching_sentence: str
+def _sentence_split(text: str) -> List[str]:
+    import re
+    sentences = re.split(r"(?<=[.!?])\s+", text.strip())
+    return [s for s in sentences if s.strip()]
+class DefenderPreservationReward:
+    _model = None
+    _cache: dict = {}
+    def _get_model(self):
+        if DefenderPreservationReward._model is None:
+            from sentence_transformers import SentenceTransformer
+            DefenderPreservationReward._model = SentenceTransformer("all-MiniLM-L6-v2")
+        return DefenderPreservationReward._model
+    def _embed(self, text: str):
+        import hashlib
+        key = hashlib.sha256(text.encode()).hexdigest()
+        if key not in DefenderPreservationReward._cache:
+            DefenderPreservationReward._cache[key] = self._get_model().encode(
+                text, convert_to_tensor=True
+            )
+        return DefenderPreservationReward._cache[key]
+    def score(self, defender_output: DefenderOutput, rewritten_script: str) -> DefenderPreservationResult:
+        from sentence_transformers.util import cos_sim
+        quote_emb = self._embed(defender_output.core_strength_quote)
+        sentences = _sentence_split(rewritten_script)
+        if not sentences:
+            return DefenderPreservationResult(score=0.0, max_similarity=0.0, best_matching_sentence="")
+        sims = [(float(cos_sim(quote_emb, self._embed(s))[0][0]), s) for s in sentences]
+        max_sim, best_sent = max(sims, key=lambda x: x[0])
+        if max_sim >= 0.85:
+            final_score = 1.0
+        elif max_sim >= 0.65:
+            final_score = max_sim
+        else:
+            final_score = 0.0
+        return DefenderPreservationResult(
+            score=final_score,
+            max_similarity=max_sim,
+            best_matching_sentence=best_sent,
+        )

viral_script_engine/rewards/reward_aggregator.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import logging
-from typing import List
 from viral_script_engine.environment.actions import ActionType
 from viral_script_engine.environment.observations import RewardComponents
@@ -11,6 +13,19 @@ _COMPONENT_FIELDS = [
     "r4_debate_resolution", "r5_defender_preservation",
 ]
 class RewardAggregator:
     def compute(
@@ -18,20 +33,31 @@ class RewardAggregator:
         components: RewardComponents,
         episode_start_components: RewardComponents,
         action_history: List[ActionType],
-    ) -> RewardComponents:
         components.compute_total()
-        # Anti-gaming rule 1: catastrophic drop (>0.2 drop in any component)
         for field in _COMPONENT_FIELDS:
             curr = getattr(components, field)
             start = getattr(episode_start_components, field)
-            if curr is not None and start is not None and curr < start - 0.2:
                 logger.warning("Catastrophic drop in %s: %.3f -> %.3f", field, start, curr)
                 components.total = 0.0
                 components.anti_gaming_penalty = start - curr
-                return components
-        # Anti-gaming rule 2: action diversity (last 3 same ActionType)
         penalty = 0.0
         if len(action_history) >= 3 and len(set(action_history[-3:])) == 1:
             penalty = 0.15
@@ -39,4 +65,15 @@ class RewardAggregator:
         components.anti_gaming_penalty = penalty
         components.total = max(0.0, min(1.0, components.total - penalty))
-        return components

 import logging
+from typing import List, Optional, Tuple
+from pydantic import BaseModel
 from viral_script_engine.environment.actions import ActionType
 from viral_script_engine.environment.observations import RewardComponents
     "r4_debate_resolution", "r5_defender_preservation",
 ]
+_DROP_THRESHOLD = 0.25
+class AntiGamingLog(BaseModel):
+    episode_id: str
+    step_num: int
+    triggered: bool
+    rule_triggered: Optional[str] = None
+    component_that_dropped: Optional[str] = None
+    penalty_applied: float
+    pre_penalty_total: float
+    post_penalty_total: float
 class RewardAggregator:
     def compute(
         components: RewardComponents,
         episode_start_components: RewardComponents,
         action_history: List[ActionType],
+        episode_id: str = "",
+        step_num: int = 0,
+    ) -> Tuple[RewardComponents, AntiGamingLog]:
         components.compute_total()
+        pre_penalty_total = components.total
         for field in _COMPONENT_FIELDS:
             curr = getattr(components, field)
             start = getattr(episode_start_components, field)
+            if curr is not None and start is not None and curr < start - _DROP_THRESHOLD:
                 logger.warning("Catastrophic drop in %s: %.3f -> %.3f", field, start, curr)
                 components.total = 0.0
                 components.anti_gaming_penalty = start - curr
+                log = AntiGamingLog(
+                    episode_id=episode_id,
+                    step_num=step_num,
+                    triggered=True,
+                    rule_triggered="catastrophic_drop",
+                    component_that_dropped=field,
+                    penalty_applied=components.anti_gaming_penalty,
+                    pre_penalty_total=pre_penalty_total,
+                    post_penalty_total=0.0,
+                )
+                return components, log
         penalty = 0.0
         if len(action_history) >= 3 and len(set(action_history[-3:])) == 1:
             penalty = 0.15
         components.anti_gaming_penalty = penalty
         components.total = max(0.0, min(1.0, components.total - penalty))
+        log = AntiGamingLog(
+            episode_id=episode_id,
+            step_num=step_num,
+            triggered=penalty > 0,
+            rule_triggered="action_repetition" if penalty > 0 else None,
+            component_that_dropped=None,
+            penalty_applied=penalty,
+            pre_penalty_total=pre_penalty_total,
+            post_penalty_total=components.total,
+        )
+        return components, log

viral_script_engine/scripts/run_baseline.py ADDED Viewed

	@@ -0,0 +1,217 @@

+#!/usr/bin/env python3
+import json
+import sys
+from pathlib import Path
+import numpy as np
+from dotenv import load_dotenv
+from rich.console import Console
+from rich.table import Table
+from rich import box
+load_dotenv()
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from viral_script_engine.agents.baseline_arbitrator import BaselineArbitratorAgent
+from viral_script_engine.environment.env import ViralScriptEnv
+console = Console()
+BASE_DIR = Path(__file__).parent.parent
+LOGS_DIR = BASE_DIR / "logs"
+LOGS_DIR.mkdir(exist_ok=True)
+_SCHEDULE = (
+    [(i, "easy") for i in range(1, 9)]
+    + [(i, "medium") for i in range(9, 17)]
+    + [(i, "hard") for i in range(17, 21)]
+)
+_REWARD_KEYS = ["r1_hook_strength", "r2_coherence", "r3_cultural_alignment",
+                "r4_debate_resolution", "r5_defender_preservation"]
+def _make_env(difficulty: str) -> ViralScriptEnv:
+    return ViralScriptEnv(
+        scripts_path=str(BASE_DIR / "data" / "test_scripts" / "scripts.json"),
+        cultural_kb_path=str(BASE_DIR / "data" / "cultural_kb.json"),
+        max_steps=5,
+        difficulty=difficulty,
+    )
+def run_episode(ep_num: int, difficulty: str, agent: BaselineArbitratorAgent) -> dict:
+    env = _make_env(difficulty)
+    obs, _ = env.reset()
+    episode_id = obs["episode_id"]
+    script_id = "unknown"
+    state = env.state()
+    original_script = state.get("original_script", "")
+    steps_log = []
+    total_reward = 0.0
+    for _ in range(env.max_steps):
+        action = agent.act(obs)
+        obs, reward, terminated, truncated, info = env.step(action)
+        rc = info["reward_components"]
+        anti_log = info.get("anti_gaming_log", {})
+        step_entry = {
+            "r1": rc.get("r1_hook_strength"),
+            "r2": rc.get("r2_coherence"),
+            "r3": rc.get("r3_cultural_alignment"),
+            "r4": rc.get("r4_debate_resolution"),
+            "r5": rc.get("r5_defender_preservation"),
+            "total": reward,
+            "anti_gaming_triggered": anti_log.get("triggered", False),
+            "penalty": anti_log.get("penalty_applied", 0.0),
+        }
+        steps_log.append(step_entry)
+        total_reward = reward
+        if terminated or truncated:
+            break
+    final_state = env.state()
+    final_script = final_state.get("current_script", "")
+    return {
+        "episode_num": ep_num,
+        "episode_id": episode_id,
+        "difficulty": difficulty,
+        "script_id": script_id,
+        "steps": steps_log,
+        "total_reward": total_reward,
+        "anti_gaming_logs": final_state.get("anti_gaming_logs", []),
+        "original_script": original_script,
+        "final_script": final_script,
+    }
+def main():
+    agent = BaselineArbitratorAgent()
+    all_episodes = []
+    for ep_num, difficulty in _SCHEDULE:
+        console.print(f"[dim]Episode {ep_num:02d}/20 ({difficulty})...[/dim]")
+        try:
+            result = run_episode(ep_num, difficulty, agent)
+            all_episodes.append(result)
+            console.print(
+                f"  -> total_reward={result['total_reward']:.3f}  "
+                f"steps={len(result['steps'])}"
+            )
+        except Exception as e:
+            console.print(f"  [red]ERROR episode {ep_num}: {e}[/red]")
+            all_episodes.append({
+                "episode_num": ep_num,
+                "episode_id": "",
+                "difficulty": difficulty,
+                "script_id": "error",
+                "steps": [],
+                "total_reward": 0.0,
+                "anti_gaming_logs": [],
+                "original_script": "",
+                "final_script": "",
+                "error": str(e),
+            })
+    results_path = LOGS_DIR / "baseline_results.json"
+    with open(results_path, "w", encoding="utf-8") as f:
+        json.dump(all_episodes, f, indent=2, default=str)
+    _save_plots(all_episodes)
+    _print_summary(all_episodes)
+    mean_total = float(np.mean([e["total_reward"] for e in all_episodes]))
+    console.print(
+        f"\n[bold green]PHASE 2 GATE: PASS — Baseline curves saved. "
+        f"Pre-training mean total reward: {mean_total:.2f}[/bold green]"
+    )
+def _collect_reward_series(episodes: list, key: str):
+    series = []
+    for ep in episodes:
+        vals = [s.get(key) for s in ep.get("steps", []) if s.get(key) is not None]
+        series.append(vals[-1] if vals else 0.0)
+    return series
+def _save_plots(episodes: list):
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    labels = {
+        "r1": "R1 Hook Strength",
+        "r2": "R2 Coherence",
+        "r3": "R3 Cultural Alignment",
+        "r4": "R4 Debate Resolution",
+        "r5": "R5 Defender Preservation",
+        "total": "Total Reward",
+    }
+    keys = list(labels.keys())
+    ep_nums = [e["episode_num"] for e in episodes]
+    fig, axes = plt.subplots(2, 3, figsize=(14, 8), dpi=150)
+    fig.suptitle(
+        "Baseline (Untrained) Arbitrator — Pre-Training Reward Curves",
+        fontsize=13,
+    )
+    for idx, key in enumerate(keys):
+        ax = axes[idx // 3][idx % 3]
+        series = _collect_reward_series(episodes, key) if key != "total" else [e["total_reward"] for e in episodes]
+        ax.plot(ep_nums, series, marker="o", linewidth=1.5, markersize=4)
+        ax.set_title(labels[key], fontsize=10)
+        ax.set_xlabel("Episode", fontsize=8)
+        ax.set_ylabel("Reward", fontsize=8)
+        ax.set_ylim(0, 1)
+        ax.set_xlim(min(ep_nums) - 0.5, max(ep_nums) + 0.5)
+        ax.tick_params(labelsize=7)
+        ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plot_path = LOGS_DIR / "baseline_reward_curves.png"
+    plt.savefig(str(plot_path), dpi=150)
+    plt.close()
+    console.print(f"[dim]Curves saved -> {plot_path}[/dim]")
+def _print_summary(episodes: list):
+    table = Table(title="Baseline Results — Mean +/- Std (20 episodes)", box=box.SIMPLE_HEAD)
+    table.add_column("Reward", style="cyan", min_width=28)
+    table.add_column("Mean", min_width=8)
+    table.add_column("Std", min_width=8)
+    table.add_column("Min", min_width=8)
+    table.add_column("Max", min_width=8)
+    label_map = {
+        "r1": "R1 Hook Strength",
+        "r2": "R2 Coherence",
+        "r3": "R3 Cultural Alignment",
+        "r4": "R4 Debate Resolution",
+        "r5": "R5 Defender Preservation",
+        "total": "Total Reward",
+    }
+    for key, label in label_map.items():
+        if key == "total":
+            vals = [e["total_reward"] for e in episodes]
+        else:
+            vals = _collect_reward_series(episodes, key)
+        arr = np.array(vals, dtype=float)
+        table.add_row(
+            label,
+            f"{arr.mean():.3f}",
+            f"{arr.std():.3f}",
+            f"{arr.min():.3f}",
+            f"{arr.max():.3f}",
+        )
+    console.print(table)
+if __name__ == "__main__":
+    main()

viral_script_engine/tests/test_environment.py CHANGED Viewed

@@ -49,6 +49,10 @@ def env():
     with (
         patch("viral_script_engine.environment.env.CriticAgent") as mock_critic_cls,
         patch("viral_script_engine.environment.env.RewriterAgent") as mock_rewriter_cls,
     ):
         mock_critic = MagicMock()
         mock_critic.critique.return_value = make_mock_critique()
@@ -58,6 +62,39 @@ def env():
         mock_rewriter.rewrite.side_effect = make_mock_rewrite
         mock_rewriter_cls.return_value = mock_rewriter
         from viral_script_engine.environment.env import ViralScriptEnv
         yield ViralScriptEnv(scripts_path=SCRIPTS_PATH, max_steps=5, difficulty="easy")

     with (
         patch("viral_script_engine.environment.env.CriticAgent") as mock_critic_cls,
         patch("viral_script_engine.environment.env.RewriterAgent") as mock_rewriter_cls,
+        patch("viral_script_engine.environment.env.DefenderAgent") as mock_defender_cls,
+        patch("viral_script_engine.environment.env.CulturalAlignmentReward") as mock_r3_cls,
+        patch("viral_script_engine.environment.env.DebateResolutionReward") as mock_r4_cls,
+        patch("viral_script_engine.environment.env.DefenderPreservationReward") as mock_r5_cls,
     ):
         mock_critic = MagicMock()
         mock_critic.critique.return_value = make_mock_critique()
         mock_rewriter.rewrite.side_effect = make_mock_rewrite
         mock_rewriter_cls.return_value = mock_rewriter
+        mock_defender = MagicMock()
+        from viral_script_engine.agents.defender import DefenderOutput
+        mock_defender.defend.return_value = DefenderOutput(
+            core_strength="strong hook",
+            core_strength_quote="test quote",
+            defense_argument="preserve it",
+            flagged_critic_claims=[],
+            regional_voice_elements=[],
+        )
+        mock_defender_cls.return_value = mock_defender
+        mock_r3 = MagicMock()
+        mock_r3.score.return_value = MagicMock(score=0.6)
+        mock_r3_cls.return_value = mock_r3
+        mock_r4 = MagicMock()
+        from viral_script_engine.rewards.r4_debate_resolution import DebateResolutionResult
+        mock_r4.score.return_value = DebateResolutionResult(
+            score=0.8,
+            resolution_status="resolved",
+            original_claim_id="C1",
+            original_claim_class="hook_weakness",
+            new_claims_count=2,
+        )
+        mock_r4_cls.return_value = mock_r4
+        mock_r5 = MagicMock()
+        from viral_script_engine.rewards.r5_defender_preservation import DefenderPreservationResult
+        mock_r5.score.return_value = DefenderPreservationResult(
+            score=0.9, max_similarity=0.9, best_matching_sentence="test quote"
+        )
+        mock_r5_cls.return_value = mock_r5
         from viral_script_engine.environment.env import ViralScriptEnv
         yield ViralScriptEnv(scripts_path=SCRIPTS_PATH, max_steps=5, difficulty="easy")

viral_script_engine/tests/test_phase2.py ADDED Viewed

	@@ -0,0 +1,325 @@

+import json
+import pytest
+from unittest.mock import MagicMock, patch
+from viral_script_engine.agents.defender import DefenderAgent, DefenderOutput, DefenderParseError
+from viral_script_engine.agents.critic import CritiqueClaim
+from viral_script_engine.environment.actions import ActionType, ArbitratorAction
+from viral_script_engine.rewards.r3_cultural_alignment import CulturalAlignmentReward
+from viral_script_engine.rewards.r4_debate_resolution import DebateResolutionReward, DebateResolutionResult
+from viral_script_engine.rewards.r5_defender_preservation import DefenderPreservationReward
+from viral_script_engine.rewards.reward_aggregator import RewardAggregator, AntiGamingLog
+from viral_script_engine.environment.observations import RewardComponents
+# ─── fixtures ────────────────────────────────────────────────────────────────
+MOCK_DEFENDER_RESPONSE = json.dumps({
+    "core_strength": "Relatable hook about saving money",
+    "core_strength_quote": "Let me tell you a secret",
+    "defense_argument": "This creates immediate viewer curiosity and should not be changed.",
+    "flagged_critic_claims": ["C2"],
+    "regional_voice_elements": ["yaar", "ek dum solid"],
+})
+MOCK_CRITIQUE_CLAIMS = [
+    CritiqueClaim(
+        claim_id="C1",
+        critique_class="hook_weakness",
+        claim_text="Weak hook.",
+        timestamp_range="0:00-0:03",
+        evidence="Let me tell you a secret",
+        is_falsifiable=True,
+        severity="high",
+    ),
+    CritiqueClaim(
+        claim_id="C2",
+        critique_class="cta_buried",
+        claim_text="CTA at end.",
+        timestamp_range="0:45-0:50",
+        evidence="Like and save this video",
+        is_falsifiable=True,
+        severity="medium",
+    ),
+]
+@pytest.fixture
+def mock_defender_llm(monkeypatch):
+    monkeypatch.setattr(
+        "viral_script_engine.agents.llm_backend.LLMBackend.generate",
+        lambda self, sys_prompt, usr_prompt, **kw: MOCK_DEFENDER_RESPONSE,
+    )
+# ─── Step 1: DefenderAgent ────────────────────────────────────────────────────
+def test_defender_parses_output(mock_defender_llm):
+    agent = DefenderAgent()
+    result = agent.defend(
+        script="Let me tell you a secret about saving money. yaar, ek dum solid plan.",
+        critic_claims=MOCK_CRITIQUE_CLAIMS,
+        region="mumbai_gen_z",
+        platform="instagram",
+    )
+    assert isinstance(result, DefenderOutput)
+    assert result.core_strength_quote == "Let me tell you a secret"
+    assert "C2" in result.flagged_critic_claims
+    assert "yaar" in result.regional_voice_elements
+def test_defender_retries_on_invalid_json(monkeypatch):
+    call_count = {"n": 0}
+    def fake_generate(self, sys_prompt, usr_prompt, **kw):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            return "NOT JSON AT ALL"
+        return MOCK_DEFENDER_RESPONSE
+    monkeypatch.setattr(
+        "viral_script_engine.agents.llm_backend.LLMBackend.generate",
+        fake_generate,
+    )
+    agent = DefenderAgent()
+    result = agent.defend("script", MOCK_CRITIQUE_CLAIMS, "mumbai_gen_z", "instagram")
+    assert isinstance(result, DefenderOutput)
+    assert call_count["n"] == 2
+def test_defender_raises_after_two_failures(monkeypatch):
+    monkeypatch.setattr(
+        "viral_script_engine.agents.llm_backend.LLMBackend.generate",
+        lambda self, sys_prompt, usr_prompt, **kw: "BAD JSON",
+    )
+    agent = DefenderAgent()
+    with pytest.raises(DefenderParseError):
+        agent.defend("script", MOCK_CRITIQUE_CLAIMS, "mumbai_gen_z", "instagram")
+# ─── Step 2: R3 CulturalAlignmentReward ──────────────────────────────────────
+@pytest.fixture
+def r3(tmp_path):
+    kb = {
+        "mumbai_gen_z": {
+            "valid_refs": ["Bandra", "CSMT", "local train", "Swiggy", "IPL"],
+            "correct_idioms": ["ek dum solid", "kya scene hai", "full on"],
+            "invalid_signals": ["trunk call", "VHS", "walkman"],
+            "anachronistic_signals": [],
+        },
+        "tier2_hindi_belt": {
+            "valid_refs": ["kirana store", "sabzi mandi", "jugaad", "panchayat", "mela"],
+            "correct_idioms": ["bilkul sahi", "arey bhai", "seedha baat"],
+            "invalid_signals": ["SaaS", "venture capital", "coworking space"],
+            "anachronistic_signals": [],
+        },
+    }
+    kb_path = tmp_path / "test_kb.json"
+    kb_path.write_text(json.dumps(kb), encoding="utf-8")
+    return CulturalAlignmentReward(knowledge_base_path=str(kb_path))
+def test_r3_scores_regional_script(r3):
+    script = "Take the local train to Bandra. IPL is on at night. ek dum solid plan yaar."
+    result = r3.score(script, "mumbai_gen_z")
+    assert result.score > 0.0
+    assert "local train" in result.valid_refs_found or "Bandra" in result.valid_refs_found
+def test_r3_scores_non_regional_script_lower(r3):
+    script = "Buy on Amazon. Use your credit card. Free delivery available nationwide."
+    regional = r3.score(
+        "Take local train to Bandra. IPL is on. ek dum solid.", "mumbai_gen_z"
+    )
+    non_regional = r3.score(script, "mumbai_gen_z")
+    assert regional.score >= non_regional.score
+def test_r3_penalises_invalid_signals(r3):
+    script = "This is like an old VHS walkman trunk call era."
+    result = r3.score(script, "mumbai_gen_z")
+    assert result.score == 0.0
+    assert len(result.invalid_signals_found) > 0
+def test_r3_neutral_for_unknown_region(r3):
+    result = r3.score("any script", "unknown_region_xyz")
+    assert result.score == 0.5
+def test_r3_tier2_valid(r3):
+    script = "Went to kirana store, met at sabzi mandi, pure jugaad. bilkul sahi plan."
+    result = r3.score(script, "tier2_hindi_belt")
+    assert result.score > 0.0
+def test_r3_tier2_penalises_metro_jargon(r3):
+    script = "We raised SaaS venture capital at a coworking space."
+    result = r3.score(script, "tier2_hindi_belt")
+    assert len(result.invalid_signals_found) > 0
+# ─── Step 3: R4 DebateResolutionReward ───────────────────────────────────────
+def _make_critique_output(claims):
+    from viral_script_engine.agents.critic import CritiqueOutput
+    return CritiqueOutput(claims=claims, overall_severity="medium", raw_response="")
+def _make_claim(claim_id, critique_class, timestamp_range, severity="high"):
+    return CritiqueClaim(
+        claim_id=claim_id,
+        critique_class=critique_class,
+        claim_text="test claim",
+        timestamp_range=timestamp_range,
+        evidence="evidence text",
+        is_falsifiable=True,
+        severity=severity,
+    )
+def _make_action():
+    return ArbitratorAction(
+        action_type=ActionType.HOOK_REWRITE,
+        target_section="hook",
+        instruction="fix hook",
+        critique_claim_id="C1",
+        reasoning="test",
+    )
+def test_r4_resolved_when_no_matching_claim():
+    mock_critic = MagicMock()
+    mock_critic.critique.return_value = _make_critique_output([
+        _make_claim("C1", "cta_buried", "0:45-0:50"),
+    ])
+    r4 = DebateResolutionReward(critic_agent=mock_critic)
+    original_claim = _make_claim("C1", "hook_weakness", "0:00-0:03", "high")
+    result = r4.score("new script", _make_action(), original_claim,
+                      "mumbai_gen_z", "instagram", "finance")
+    assert result.score == 1.0
+    assert result.resolution_status == "resolved"
+def test_r4_partially_resolved_when_severity_drops():
+    mock_critic = MagicMock()
+    mock_critic.critique.return_value = _make_critique_output([
+        _make_claim("C1", "hook_weakness", "0:01-0:04", "low"),
+    ])
+    r4 = DebateResolutionReward(critic_agent=mock_critic)
+    original_claim = _make_claim("C1", "hook_weakness", "0:00-0:03", "high")
+    result = r4.score("new script", _make_action(), original_claim,
+                      "mumbai_gen_z", "instagram", "finance")
+    assert result.score == 0.5
+    assert result.resolution_status == "partially_resolved"
+def test_r4_persists_when_same_severity():
+    mock_critic = MagicMock()
+    mock_critic.critique.return_value = _make_critique_output([
+        _make_claim("C1", "hook_weakness", "0:01-0:03", "high"),
+    ])
+    r4 = DebateResolutionReward(critic_agent=mock_critic)
+    original_claim = _make_claim("C1", "hook_weakness", "0:00-0:03", "high")
+    result = r4.score("new script", _make_action(), original_claim,
+                      "mumbai_gen_z", "instagram", "finance")
+    assert result.score == 0.0
+    assert result.resolution_status == "persists"
+# ─── Step 4: R5 DefenderPreservationReward ───────────────────────────────────
+def _make_defender_output(quote: str) -> DefenderOutput:
+    return DefenderOutput(
+        core_strength="Strong opening",
+        core_strength_quote=quote,
+        defense_argument="Should be preserved.",
+        flagged_critic_claims=[],
+        regional_voice_elements=[],
+    )
+def test_r5_high_score_when_quote_present():
+    r5 = DefenderPreservationReward()
+    quote = "Let me tell you a secret about saving money every month."
+    script = "Let me tell you a secret about saving money every month. This is the key insight."
+    defender_out = _make_defender_output(quote)
+    result = r5.score(defender_out, script)
+    assert result.score >= 0.85
+def test_r5_zero_score_when_quote_absent():
+    r5 = DefenderPreservationReward()
+    quote = "Completely different text that shares nothing with rewrite."
+    script = "Today we discuss quantum physics and neutron stars in distant galaxies."
+    defender_out = _make_defender_output(quote)
+    result = r5.score(defender_out, script)
+    assert result.score < 0.65
+# ─── Step 5/6: AntiGamingLog and RewardAggregator ────────────────────────────
+def _make_components(**kwargs) -> RewardComponents:
+    defaults = dict(
+        r1_hook_strength=0.7, r2_coherence=0.7,
+        r3_cultural_alignment=0.7,
+        r4_debate_resolution=None,
+        r5_defender_preservation=None,
+    )
+    defaults.update(kwargs)
+    rc = RewardComponents(**defaults)
+    rc.compute_total()
+    return rc
+def test_anti_gaming_catastrophic_drop_zeroes_reward():
+    aggregator = RewardAggregator()
+    start = _make_components(r2_coherence=0.8)
+    current = _make_components(r2_coherence=0.4)
+    result, log = aggregator.compute(current, start, [], episode_id="ep1", step_num=1)
+    assert result.total == 0.0
+    assert log.triggered is True
+    assert log.rule_triggered == "catastrophic_drop"
+    assert log.component_that_dropped == "r2_coherence"
+    assert log.post_penalty_total == 0.0
+def test_anti_gaming_diversity_penalty_fires_on_3x_same():
+    aggregator = RewardAggregator()
+    start = _make_components()
+    current = _make_components()
+    history = [ActionType.HOOK_REWRITE] * 3
+    result, log = aggregator.compute(current, start, history, episode_id="ep2", step_num=2)
+    assert log.triggered is True
+    assert log.rule_triggered == "action_repetition"
+    assert log.penalty_applied == 0.15
+def test_anti_gaming_log_not_triggered_clean():
+    aggregator = RewardAggregator()
+    start = _make_components()
+    current = _make_components()
+    history = [ActionType.HOOK_REWRITE, ActionType.CTA_PLACEMENT, ActionType.SECTION_REORDER]
+    result, log = aggregator.compute(current, start, history, episode_id="ep3", step_num=1)
+    assert log.triggered is False
+    assert log.rule_triggered is None
+    assert log.penalty_applied == 0.0
+def test_anti_gaming_log_fields_populated():
+    aggregator = RewardAggregator()
+    start = _make_components(r1_hook_strength=0.9)
+    current = _make_components(r1_hook_strength=0.5)
+    _, log = aggregator.compute(current, start, [], episode_id="myep", step_num=3)
+    assert log.episode_id == "myep"
+    assert log.step_num == 3
+    assert isinstance(log, AntiGamingLog)

viral_script_engine/tests/test_rewards.py CHANGED Viewed

@@ -93,7 +93,7 @@ def test_aggregator_catastrophic_drop(aggregator):
     start = RewardComponents(r1_hook_strength=0.8, r2_coherence=0.7)
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.3, r2_coherence=0.7)
-    result = aggregator.compute(current, start, [ActionType.HOOK_REWRITE])
     assert result.total == 0.0
@@ -102,7 +102,7 @@ def test_aggregator_diversity_penalty(aggregator):
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
     history = [ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE]
-    result = aggregator.compute(current, start, history)
     assert result.anti_gaming_penalty == 0.15
     assert result.total < 0.7
@@ -112,6 +112,6 @@ def test_aggregator_no_penalty(aggregator):
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
     history = [ActionType.HOOK_REWRITE, ActionType.CTA_PLACEMENT, ActionType.SECTION_REORDER]
-    result = aggregator.compute(current, start, history)
     assert result.anti_gaming_penalty == 0.0
     assert result.total > 0

     start = RewardComponents(r1_hook_strength=0.8, r2_coherence=0.7)
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.3, r2_coherence=0.7)
+    result, log = aggregator.compute(current, start, [ActionType.HOOK_REWRITE])
     assert result.total == 0.0
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
     history = [ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE, ActionType.HOOK_REWRITE]
+    result, log = aggregator.compute(current, start, history)
     assert result.anti_gaming_penalty == 0.15
     assert result.total < 0.7
     start.compute_total()
     current = RewardComponents(r1_hook_strength=0.7, r2_coherence=0.7)
     history = [ActionType.HOOK_REWRITE, ActionType.CTA_PLACEMENT, ActionType.SECTION_REORDER]
+    result, log = aggregator.compute(current, start, history)
     assert result.anti_gaming_penalty == 0.0
     assert result.total > 0