Spaces:

vajeeda
/

MetaDebate

Sleeping

App Files Files Community

vajeeda commited on Apr 26

Commit

79cb04a

1 Parent(s): 09f7d63

feat(phase12): RetentionCurveSimulator, R10, 150-sample dataset, model trained, 14 tests PASS, gate PASS

Browse files

Files changed (20) hide show

demo/run_demo.py +107 -4
docs/progress.md +17 -0
scripts/run_dummy_episode.py +15 -6
scripts/train_retention_model.py +48 -0
session/context.md +11 -10
session/phase-log.md +1 -0
session/summary.md +22 -23
viral_script_engine/environment/env.py +19 -0
viral_script_engine/environment/observations.py +8 -5
viral_script_engine/retention/__init__.py +0 -0
viral_script_engine/retention/curve_predictor.py +152 -0
viral_script_engine/retention/curve_scorer.py +106 -0
viral_script_engine/retention/feature_extractor.py +190 -0
viral_script_engine/retention/model.joblib +3 -0
viral_script_engine/retention/training_data/__init__.py +0 -0
viral_script_engine/retention/training_data/build_dataset.py +158 -0
viral_script_engine/retention/training_data/retention_dataset.json +0 -0
viral_script_engine/rewards/r10_retention_curve.py +65 -0
viral_script_engine/rewards/reward_aggregator.py +1 -0
viral_script_engine/tests/test_phase12.py +325 -0

demo/run_demo.py CHANGED Viewed

@@ -283,7 +283,77 @@ def act4_arbitrator_decides(
     console.print()
-def act5_rewrite_and_reward(original_script: str, rewritten_script: str, reward_components: dict, baseline_total: float):
     console.print(Rule("[bold magenta]ACT 5 — THE REWRITE + REWARD[/bold magenta]", style="magenta"))
     diff_text = Text()
@@ -303,6 +373,30 @@ def act5_rewrite_and_reward(original_script: str, rewritten_script: str, reward_
     console.print()
     labels = {
         "r1_hook_strength": "R1 Hook Strength",
         "r2_coherence": "R2 Coherence",
@@ -310,6 +404,7 @@ def act5_rewrite_and_reward(original_script: str, rewritten_script: str, reward_
         "r4_debate_resolution": "R4 Resolution",
         "r5_defender_preservation": "R5 Preservation",
         "r9_platform_pacing": "R9 Platform Pacing",
     }
     table = Table(box=box.SIMPLE_HEAD, show_header=False, padding=(0, 1))
@@ -550,10 +645,14 @@ def run_compare(script_id: str):
         "total": (new_r1 + new_r2 + new_r3 + new_r5) / 4,
     }
-    act5_rewrite_and_reward(current_script, new_script, reward_components, baseline_total)
     console.print(Panel(
-        "[bold green]Demo complete.[/bold green] The Trained Arbitrator's richer reasoning produced "
         "a more targeted rewrite. Run [bold]python training/train_grpo.py[/bold] in Colab to "
         "train the Arbitrator with GRPO and see real improvement curves.",
         border_style="green",
@@ -647,7 +746,11 @@ def run_interactive():
             "total": (new_r1 + new_r2 + new_r3 + new_r5) / 4,
         }
-        act5_rewrite_and_reward(current_script, new_script, reward_components, baseline_total)
         current_script = new_script
         again = input("Continue to next step? [y/n]: ").strip().lower()

     console.print()
+def _retention_ascii_row(level_pct: int, values: list, timepoints: list) -> str:
+    """Render one horizontal row of the ASCII retention chart."""
+    threshold = level_pct / 100
+    bar = ""
+    for v in values:
+        bar += "██" if v >= threshold else "  "
+    label = f"{level_pct:4d}% |"
+    return f"{label}{bar}"
+def _render_retention_ascii(values: list, timepoints: list, label: str) -> str:
+    """Render a compact ASCII bar chart of a retention curve."""
+    rows = []
+    rows.append(f"  {label}")
+    for level in [100, 75, 50, 25]:
+        rows.append(_retention_ascii_row(level, values, timepoints))
+    # x-axis
+    axis = "       +" + "--" * len(timepoints)
+    tick_labels = "        " + " ".join(f"{t:<2}" for t in timepoints)
+    rows.append(axis)
+    rows.append(tick_labels + "s")
+    return "\n".join(rows)
+def _show_retention_curves(
+    orig_values: list,
+    new_values: list,
+    timepoints: list,
+    orig_auc: float,
+    new_auc: float,
+    orig_drop: int,
+    new_drop: int,
+) -> None:
+    """Render before/after retention curves as ASCII art in a panel."""
+    before_chart = _render_retention_ascii(orig_values, timepoints, "Before rewrite:")
+    after_chart = _render_retention_ascii(new_values, timepoints, "After rewrite:")
+    auc_delta = new_auc - orig_auc
+    auc_pct = (auc_delta / orig_auc * 100) if orig_auc > 0 else 0.0
+    sign = "+" if auc_delta >= 0 else ""
+    drop_line = (
+        f"Drop-off point: {orig_drop}s -> {new_drop}s"
+        if new_drop != orig_drop
+        else f"Drop-off point: {orig_drop}s (unchanged)"
+    )
+    body = (
+        f"{before_chart}\n\n"
+        f"{after_chart}\n\n"
+        f"Improvement: AUC {orig_auc:.2f} -> {new_auc:.2f} ({sign}{auc_pct:.0f}%)\n"
+        f"{drop_line}"
+    )
+    console.print(Panel(
+        body,
+        title="[cyan]PREDICTED RETENTION CURVE[/cyan]",
+        border_style="cyan",
+        padding=(1, 2),
+    ))
+    console.print()
+def act5_rewrite_and_reward(
+    original_script: str,
+    rewritten_script: str,
+    reward_components: dict,
+    baseline_total: float,
+    platform: str = "Reels",
+    region: str = "pan_india_english",
+    action_type: str = "hook_rewrite",
+):
     console.print(Rule("[bold magenta]ACT 5 — THE REWRITE + REWARD[/bold magenta]", style="magenta"))
     diff_text = Text()
     console.print()
+    # Phase 12: retention curve visualisation
+    try:
+        from viral_script_engine.retention.feature_extractor import FeatureExtractor
+        from viral_script_engine.retention.curve_predictor import RetentionCurvePredictor
+        from viral_script_engine.retention.curve_scorer import RetentionCurveScorer
+        extractor = FeatureExtractor()
+        predictor = RetentionCurvePredictor()
+        if predictor._trained:
+            orig_feat = extractor.extract(original_script, platform, region)
+            new_feat = extractor.extract(rewritten_script, platform, region)
+            orig_curve = predictor.predict(orig_feat)
+            new_curve = predictor.predict(new_feat)
+            _show_retention_curves(
+                orig_values=orig_curve.values,
+                new_values=new_curve.values,
+                timepoints=orig_curve.timepoints,
+                orig_auc=orig_curve.area_under_curve,
+                new_auc=new_curve.area_under_curve,
+                orig_drop=orig_curve.drop_off_point,
+                new_drop=new_curve.drop_off_point,
+            )
+    except Exception:
+        pass
     labels = {
         "r1_hook_strength": "R1 Hook Strength",
         "r2_coherence": "R2 Coherence",
         "r4_debate_resolution": "R4 Resolution",
         "r5_defender_preservation": "R5 Preservation",
         "r9_platform_pacing": "R9 Platform Pacing",
+        "r10_retention_curve": "R10 Retention Curve",
     }
     table = Table(box=box.SIMPLE_HEAD, show_header=False, padding=(0, 1))
         "total": (new_r1 + new_r2 + new_r3 + new_r5) / 4,
     }
+    act5_rewrite_and_reward(
+        current_script, new_script, reward_components, baseline_total,
+        platform=platform, region=region,
+        action_type=str(arb_action.action_type.value),
+    )
     console.print(Panel(
+        "[bold green]Demo complete.[/bold green] The Trained Arbitrator's richer reasoning produced"
         "a more targeted rewrite. Run [bold]python training/train_grpo.py[/bold] in Colab to "
         "train the Arbitrator with GRPO and see real improvement curves.",
         border_style="green",
             "total": (new_r1 + new_r2 + new_r3 + new_r5) / 4,
         }
+        act5_rewrite_and_reward(
+            current_script, new_script, reward_components, baseline_total,
+            platform=platform, region=region,
+            action_type=str(arb_action.action_type.value),
+        )
         current_script = new_script
         again = input("Continue to next step? [y/n]: ").strip().lower()

docs/progress.md CHANGED Viewed

@@ -152,6 +152,23 @@ Do not read entire codebase to understand progress — read this file.
 ✅ test_phase11.py — 24 tests, all passing
 ✅ Phase 11 gate — PHASE 11 GATE: PASS, 6 sessions completed, trend: plateauing
 ## Blocked Items
 ❌ GRPOConfig test — blocked by: pyarrow DLL blocked by Windows App Control (works on Linux/Colab)
 ❌ Full GRPO training — blocked by: no local GPU (requires Colab or cloud compute)

 ✅ test_phase11.py — 24 tests, all passing
 ✅ Phase 11 gate — PHASE 11 GATE: PASS, 6 sessions completed, trend: plateauing
+## Phase 12 — Retention Curve Simulator
+✅ ScriptFeatures + FeatureExtractor — 14 structural features extracted; platform one-hot; zero LLM calls
+✅ build_dataset.py + retention_dataset.json — 150 rule-based samples (50 high/medium/low); monotonic curve generation
+✅ RetentionCurvePredictor — MultiOutputRegressor(GBR); 10-point curve; train/predict; monotonic enforcement; avg MAE 0.031
+✅ RetentionCurve model — timepoints, values, AUC (trapezoidal), drop_off_point
+✅ retention/model.joblib — trained model saved
+✅ RetentionCurveScorer — ACTION_CURVE_MAP; overall+targeted+regression formula; CurveScorerResult
+✅ RetentionCurveReward (R10) — wraps extractor+predictor+scorer; episode-level original curve cache
+✅ observations.py — r10_retention_curve in RewardComponents; _WEIGHTS updated to 10-reward spec
+✅ reward_aggregator.py — r10_retention_curve in anti-gaming _COMPONENT_FIELDS
+✅ env.py — R10 wired in __init__() and step(); graceful skip if model not trained
+✅ scripts/train_retention_model.py — one-time training; builds dataset if missing; prints MAE
+✅ demo/run_demo.py — ASCII retention curve in Act 5; R10 row in reward table
+✅ scripts/run_dummy_episode.py — R10 gate assertion; Phase 12 GATE message
+✅ test_phase12.py — 14 tests, all passing
+✅ Phase 12 gate — PHASE 12 GATE: PASS, R10 firing
 ## Blocked Items
 ❌ GRPOConfig test — blocked by: pyarrow DLL blocked by Windows App Control (works on Linux/Colab)
 ❌ Full GRPO training — blocked by: no local GPU (requires Colab or cloud compute)

scripts/run_dummy_episode.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Gate check script for Phase 9 — runs a dummy episode and verifies R9 fires.
 Usage:
     python scripts/run_dummy_episode.py --difficulty easy --steps 3 --verbose
@@ -71,12 +71,14 @@ def run_episode(difficulty: str, steps: int, verbose: bool):
             if verbose:
                 r9 = rc.get("r9_platform_pacing")
                 r1 = rc.get("r1_hook_strength")
                 r2 = rc.get("r2_coherence")
                 r9_str = f"{r9:.3f}" if r9 is not None else "None"
                 print(
                     f"  Step {step + 1}: total={reward:.3f}  "
-                    f"R1={r1:.3f}  R2={r2:.3f}  R9={r9_str}"
                 )
             if terminated:
@@ -86,7 +88,7 @@ def run_episode(difficulty: str, steps: int, verbose: bool):
 def main():
-    parser = argparse.ArgumentParser(description="Phase 9 dummy episode gate check")
     parser.add_argument("--difficulty", default="easy", choices=["easy", "medium", "hard"])
     parser.add_argument("--steps", type=int, default=3)
     parser.add_argument("--verbose", action="store_true")
@@ -103,16 +105,23 @@ def main():
         elif not (0.0 <= rc["r9_platform_pacing"] <= 1.0):
             errors.append(f"Step {i+1}: r9_platform_pacing out of range: {rc['r9_platform_pacing']}")
     if errors:
         print("\n[GATE FAIL]")
         for e in errors:
             print(f"  ERROR: {e}")
         sys.exit(1)
     else:
         print(
-            f"\nPHASE 9 GATE: PASS — Platform-aware rewards active. "
-            f"R9 firing on platform={platform}. "
-            f"Cross-platform divergence confirmed."
         )

 """
+Gate check script for Phase 12 — runs a dummy episode and verifies R9 and R10 fire.
 Usage:
     python scripts/run_dummy_episode.py --difficulty easy --steps 3 --verbose
             if verbose:
                 r9 = rc.get("r9_platform_pacing")
+                r10 = rc.get("r10_retention_curve")
                 r1 = rc.get("r1_hook_strength")
                 r2 = rc.get("r2_coherence")
                 r9_str = f"{r9:.3f}" if r9 is not None else "None"
+                r10_str = f"{r10:.3f}" if r10 is not None else "None"
                 print(
                     f"  Step {step + 1}: total={reward:.3f}  "
+                    f"R1={r1:.3f}  R2={r2:.3f}  R9={r9_str}  R10={r10_str}"
                 )
             if terminated:
 def main():
+    parser = argparse.ArgumentParser(description="Phase 12 dummy episode gate check")
     parser.add_argument("--difficulty", default="easy", choices=["easy", "medium", "hard"])
     parser.add_argument("--steps", type=int, default=3)
     parser.add_argument("--verbose", action="store_true")
         elif not (0.0 <= rc["r9_platform_pacing"] <= 1.0):
             errors.append(f"Step {i+1}: r9_platform_pacing out of range: {rc['r9_platform_pacing']}")
+        if rc.get("r10_retention_curve") is None:
+            errors.append(f"Step {i+1}: r10_retention_curve is None — R10 not firing")
+        elif not (0.0 <= rc["r10_retention_curve"] <= 1.0):
+            errors.append(f"Step {i+1}: r10_retention_curve out of range: {rc['r10_retention_curve']}")
     if errors:
         print("\n[GATE FAIL]")
         for e in errors:
             print(f"  ERROR: {e}")
         sys.exit(1)
     else:
+        # Compute average AUC improvement for gate message
+        r10_scores = [rc.get("r10_retention_curve", 0.0) for rc in steps_data if rc.get("r10_retention_curve") is not None]
+        avg_r10 = sum(r10_scores) / len(r10_scores) if r10_scores else 0.0
         print(
+            f"\nPHASE 12 GATE: PASS — Retention curve predictor active. "
+            f"R10 firing. AUC improvement: +{avg_r10:.2f}."
         )

scripts/train_retention_model.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""
+One-time training script for the RetentionCurvePredictor.
+Usage:
+    python scripts/train_retention_model.py
+Steps:
+    1. Builds retention_dataset.json if it doesn't exist
+    2. Trains the RetentionCurvePredictor
+    3. Prints train/val MAE per timepoint
+    4. Saves model to viral_script_engine/retention/model.joblib
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from viral_script_engine.retention.training_data.build_dataset import build, _OUTPUT_PATH
+from viral_script_engine.retention.curve_predictor import RetentionCurvePredictor
+_CULTURAL_KB_PATH = str(
+    Path(__file__).parent.parent / "viral_script_engine" / "data" / "cultural_kb.json"
+)
+def main():
+    # Step 1: build dataset if missing
+    if not _OUTPUT_PATH.exists():
+        print("Building retention dataset...")
+        out = build()
+        print(f"  Dataset created: {out}")
+    else:
+        print(f"Dataset already exists: {_OUTPUT_PATH}")
+    # Step 2: train the predictor
+    print("\nTraining RetentionCurvePredictor...")
+    predictor = RetentionCurvePredictor()
+    result = predictor.train(
+        dataset_path=str(_OUTPUT_PATH),
+        cultural_kb_path=_CULTURAL_KB_PATH,
+    )
+    avg_mae = result["avg_mae"]
+    print(f"\nRetention model trained. Avg MAE: {avg_mae:.4f}. Model saved.")
+if __name__ == "__main__":
+    main()

session/context.md CHANGED Viewed

@@ -1,21 +1,21 @@
 # Context — Carry Over for Next Session
 ## Current Phase
-Phase: 10
-Prompt file: prompts/phase-10.md
 Status: complete
 ---
 ## Currently Working On
-Feature: Phase 10 complete. Awaiting user confirmation to proceed to next phase (if any).
 File(s): N/A
-Status: All 25 tests pass. Gate script prints PHASE 10 GATE: PASS.
 ---
 ## Open Questions
-Is there a Phase 11? Check if prompts/phase-11.md exists.
 ---
@@ -27,15 +27,16 @@ Full GRPO training requires Colab or cloud GPU
 ---
 ## Last Commit Message
-feat(phase10): ABScriptEnv, ContrastiveReward, A/B rollout, 25 tests PASS, gate PASS
 ---
 ## Do Not Forget
-ABScriptEnv.reset() runs forced step 1 automatically — step 2+ are free choice
-Contrastive reward formula: base_reward + tanh(delta*3)*0.2, clipped [0,1]
-Cumulative reward is sum of per-step totals — clips to 1.0 with 4+ steps at high score
-Gate check: python scripts/run_ab_episode.py --script S08 --steps 4 --verbose
 ---

 # Context — Carry Over for Next Session
 ## Current Phase
+Phase: 12
+Prompt file: prompts/phase-12.md
 Status: complete
 ---
 ## Currently Working On
+Feature: Phase 12 complete. Awaiting user confirmation to proceed to next phase (if any).
 File(s): N/A
+Status: All 14 tests pass. Gate script prints PHASE 12 GATE: PASS.
 ---
 ## Open Questions
+Is there a Phase 13? Check if prompts/phase-13.md exists.
 ---
 ---
 ## Last Commit Message
+feat(phase12): RetentionCurveSimulator, R10, 150-sample dataset, model trained, 14 tests PASS, gate PASS
 ---
 ## Do Not Forget
+R10 requires trained model — run python scripts/train_retention_model.py first
+RetentionCurvePredictor model saved at viral_script_engine/retention/model.joblib
+MODEL_PATH is Path(__file__).parent / "model.joblib" (relative to curve_predictor.py)
+R10 gracefully skips (score=None) in env.step() if model not trained
+Gate check: python scripts/run_dummy_episode.py --difficulty easy --steps 3 --verbose
 ---

session/phase-log.md CHANGED Viewed

@@ -30,6 +30,7 @@ ROLLED BACK — changes reverted, reason in line
 [2026-04-26] [Phase 9] COMPLETE — PlatformRegistry, R9 PlatformPacing, R1/R2 platform-aware, 20 tests PASS, gate PASS
 [2026-04-26] [Phase 10] COMPLETE — ABScriptEnv, ContrastiveReward, A/B rollout fn, 25 tests PASS, gate PASS
 [2026-04-26] [Phase 11] COMPLETE — CreatorHistoryBuffer, MemoryCompressor, HistoryStore, 24 tests PASS, gate PASS
 ---

 [2026-04-26] [Phase 9] COMPLETE — PlatformRegistry, R9 PlatformPacing, R1/R2 platform-aware, 20 tests PASS, gate PASS
 [2026-04-26] [Phase 10] COMPLETE — ABScriptEnv, ContrastiveReward, A/B rollout fn, 25 tests PASS, gate PASS
 [2026-04-26] [Phase 11] COMPLETE — CreatorHistoryBuffer, MemoryCompressor, HistoryStore, 24 tests PASS, gate PASS
+[2026-04-26] [Phase 12] COMPLETE — RetentionCurveSimulator, R10, 150-sample dataset, model MAE 0.031, 14 tests PASS, gate PASS
 ---

session/summary.md CHANGED Viewed

@@ -13,40 +13,39 @@ One session = one summary. Previous summaries live in phase-log.md.
 2026-04-26
 ### Phase
-Phase 9 — Multi-Platform Reward Divergence
 ### What Was Done
-- Created platforms/__init__.py, platform_kb.json, platform_spec.py — PlatformRegistry single source of truth for all 4 platforms
-- Updated rewards/r1_hook_strength.py — platform-aware hook scoring via PlatformRegistry; new length_fit check (6th check, 15% weight)
-- Updated rewards/r2_coherence.py — platform length penalty (max 0.3 cap) applied after semantic similarity score
-- Created rewards/r9_platform_pacing.py — PlatformPacingReward; 3 checks: pacing (40%), section ratio (40%), CTA position (20%)
-- Updated environment/observations.py — r9_platform_pacing in RewardComponents; updated _WEIGHTS to 9-reward spec
-- Updated rewards/reward_aggregator.py — r9_platform_pacing added to anti-gaming _COMPONENT_FIELDS
-- Updated environment/env.py — R9 wired in step(); _current_platform stored on reset(); platform passed to R1/R2
-- Updated curriculum JSONL files — added Feed entries: easy (+2), medium (+3), hard (+4 cross-platform)
-- Updated demo/run_demo.py — Act 1 shows platform spec (hook window, max length, pacing); Act 5 shows R9 row
-- Created tests/test_phase9.py — 20 tests, all passing
-- Created scripts/run_dummy_episode.py — LLM-stubbed gate check; Phase 9 GATE: PASS
-- Created scripts/run_platform_comparison.py — S03 scored on Reels/Shorts/Feed; all 3 rewards diverge; GATE: PASS
 ### What Was NOT Done (carry over)
 - Real GRPO training — requires GPU (Colab)
 ### Errors Encountered
-- test_short_hook_passes_length_fit_on_reels: hook was ~18 words (exceeded Reels 15-word limit) — fixed test script
-- test_penalty_capped_at_0_3: compared semantically different scripts (base sim=0) — fixed to use same-vocab scripts
-- test_same_script_scores_differently_on_reels_vs_feed: _SLOW_SCRIPT both pacing+ratio zeroed out on both platforms — switched to sub-score comparison
-- test_env_r9_fires_in_step: defender.defend() not patched → API call — patched defender with full MagicMock
-- run_dummy_episode.py: R5 needs core_strength_quote from defender mock — added all required fields
-- run_platform_comparison.py: Unicode bar chars fail on Windows cp1252 — switched to ASCII #/.
 ### Tests Status
-Phase 9: 20 passed
-Gate check (dummy episode): PASS
-Gate check (platform comparison S03): PASS — R1/R2/R9 all diverge across Reels/Shorts/Feed
 ### Commit Messages Generated
-feat(phase9): platform reward divergence — PlatformRegistry, R9 PlatformPacing, R1/R2 platform-aware, 20 tests PASS, gate PASS
 ---

 2026-04-26
 ### Phase
+Phase 12 — Retention Curve Simulator
 ### What Was Done
+- Created viral_script_engine/retention/__init__.py — package init
+- Created viral_script_engine/retention/feature_extractor.py — ScriptFeatures pydantic model (14 features + platform one-hot); FeatureExtractor.extract() — zero LLM calls, structural analysis
+- Created viral_script_engine/retention/training_data/__init__.py — package init
+- Created viral_script_engine/retention/training_data/build_dataset.py — 150 rule-based samples (50 high/medium/low); monotonic curve generation from R1/R2/R3 scores
+- Created viral_script_engine/retention/training_data/retention_dataset.json — 150 samples generated
+- Created viral_script_engine/retention/curve_predictor.py — RetentionCurvePredictor (MultiOutputRegressor+GBR); RetentionCurve model with AUC + drop-off; train/predict; monotonic enforcement
+- Created viral_script_engine/retention/model.joblib — trained model, avg MAE 0.031
+- Created viral_script_engine/retention/curve_scorer.py — RetentionCurveScorer; ACTION_CURVE_MAP; overall+targeted+regression formula
+- Created viral_script_engine/rewards/r10_retention_curve.py — RetentionCurveReward; episode-level original curve caching
+- Updated viral_script_engine/environment/observations.py — r10_retention_curve field; updated _WEIGHTS to 10-reward spec
+- Updated viral_script_engine/rewards/reward_aggregator.py — r10_retention_curve in anti-gaming _COMPONENT_FIELDS
+- Updated viral_script_engine/environment/env.py — R10 wired in __init__() and step(); graceful skip if model not trained
+- Created scripts/train_retention_model.py — one-time training script; builds dataset if missing; prints MAE
+- Updated demo/run_demo.py — _render_retention_ascii(); _show_retention_curves() ASCII panel in Act 5; R10 row in reward table
+- Updated scripts/run_dummy_episode.py — R10 check in gate assertions; Phase 12 GATE message
+- Created viral_script_engine/tests/test_phase12.py — 14 tests, all passing
+- Phase 12 gate: PASS
 ### What Was NOT Done (carry over)
 - Real GRPO training — requires GPU (Colab)
 ### Errors Encountered
+- None; all 14 tests passed on first run
 ### Tests Status
+Phase 12: 14 passed
+Gate check: PHASE 12 GATE: PASS — Retention curve predictor active. R10 firing.
 ### Commit Messages Generated
+feat(phase12): RetentionCurveSimulator, R10, 150-sample dataset, model trained, 14 tests PASS, gate PASS
 ---

viral_script_engine/environment/env.py CHANGED Viewed

@@ -30,6 +30,7 @@ from viral_script_engine.rewards.r9_platform_pacing import PlatformPacingReward
 from viral_script_engine.platforms.platform_spec import PlatformRegistry
 from viral_script_engine.memory.memory_compressor import MemoryCompressor
 from viral_script_engine.memory.history_store import HistoryStore
 _TIERS = {
     "easy": ["S01", "S02", "S03", "S04"],
@@ -85,6 +86,7 @@ class ViralScriptEnv:
         self.platform_registry = PlatformRegistry()
         self.memory_compressor = MemoryCompressor()
         self.history_store = HistoryStore()
         self._state: Optional[EpisodeState] = None
         self._current_profile: Optional[CreatorProfile] = None
         self._current_platform: str = "Reels"
@@ -282,6 +284,22 @@ class ViralScriptEnv:
         # Phase 9: compute R9 platform pacing
         r9_result = self.r9.score(new_script, platform=self._current_platform)
         components = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
@@ -292,6 +310,7 @@ class ViralScriptEnv:
             r7_originality=r7_result.score,
             r8_persona_fit=r8_score,
             r9_platform_pacing=r9_result.score,
             process_reward=process_result.weighted_contribution if process_result else None,
         )

 from viral_script_engine.platforms.platform_spec import PlatformRegistry
 from viral_script_engine.memory.memory_compressor import MemoryCompressor
 from viral_script_engine.memory.history_store import HistoryStore
+from viral_script_engine.rewards.r10_retention_curve import RetentionCurveReward
 _TIERS = {
     "easy": ["S01", "S02", "S03", "S04"],
         self.platform_registry = PlatformRegistry()
         self.memory_compressor = MemoryCompressor()
         self.history_store = HistoryStore()
+        self.r10 = RetentionCurveReward(cultural_kb_path=cultural_kb_path)
         self._state: Optional[EpisodeState] = None
         self._current_profile: Optional[CreatorProfile] = None
         self._current_platform: str = "Reels"
         # Phase 9: compute R9 platform pacing
         r9_result = self.r9.score(new_script, platform=self._current_platform)
+        # Phase 12: compute R10 retention curve reward
+        r10_score = None
+        if self.r10.predictor._trained:
+            try:
+                r10_result = self.r10.score(
+                    original_script=self._state.original_script,
+                    rewritten_script=new_script,
+                    platform=self._current_platform,
+                    region=self._state.region,
+                    action_type=str(arb_action.action_type.value),
+                    episode_id=self._state.episode_id,
+                )
+                r10_score = r10_result.score
+            except Exception:
+                r10_score = None
         components = RewardComponents(
             r1_hook_strength=r1_result.score,
             r2_coherence=r2_result.score,
             r7_originality=r7_result.score,
             r8_persona_fit=r8_score,
             r9_platform_pacing=r9_result.score,
+            r10_retention_curve=r10_score,
             process_reward=process_result.weighted_contribution if process_result else None,
         )

viral_script_engine/environment/observations.py CHANGED Viewed

@@ -6,9 +6,10 @@ from viral_script_engine.agents.critic import CritiqueClaim
 from viral_script_engine.environment.actions import ArbitratorAction
 _WEIGHTS: Dict[str, float] = {
-    "r1": 0.15, "r2": 0.12, "r3": 0.10,
-    "r4": 0.10, "r5": 0.10, "r6": 0.08,
-    "r7": 0.08, "r8": 0.08, "r9": 0.09,
 }
@@ -21,8 +22,9 @@ class RewardComponents(BaseModel):
     r6_safety: Optional[float] = None
     r7_originality: Optional[float] = None
     r8_persona_fit: Optional[float] = None   # Phase 8: creator persona fit
-    r9_platform_pacing: Optional[float] = None  # Phase 9: platform pacing fit
-    process_reward: Optional[float] = None   # fired before rewrite (Phase 7)
     anti_gaming_penalty: float = 0.0
     total: float = 0.0
@@ -37,6 +39,7 @@ class RewardComponents(BaseModel):
             "r7": self.r7_originality,
             "r8": self.r8_persona_fit,
             "r9": self.r9_platform_pacing,
         }
         active = {k: v for k, v in vals.items() if v is not None}
         if not active:

 from viral_script_engine.environment.actions import ArbitratorAction
 _WEIGHTS: Dict[str, float] = {
+    "r1": 0.12, "r2": 0.10, "r3": 0.10,
+    "r4": 0.10, "r5": 0.08, "r6": 0.07,
+    "r7": 0.07, "r8": 0.08, "r9": 0.08,
+    "r10": 0.10,
 }
     r6_safety: Optional[float] = None
     r7_originality: Optional[float] = None
     r8_persona_fit: Optional[float] = None   # Phase 8: creator persona fit
+    r9_platform_pacing: Optional[float] = None   # Phase 9: platform pacing fit
+    r10_retention_curve: Optional[float] = None  # Phase 12: retention curve reward
+    process_reward: Optional[float] = None        # fired before rewrite (Phase 7)
     anti_gaming_penalty: float = 0.0
     total: float = 0.0
             "r7": self.r7_originality,
             "r8": self.r8_persona_fit,
             "r9": self.r9_platform_pacing,
+            "r10": self.r10_retention_curve,
         }
         active = {k: v for k, v in vals.items() if v is not None}
         if not active:

viral_script_engine/retention/__init__.py ADDED Viewed

File without changes

viral_script_engine/retention/curve_predictor.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import json
+from pathlib import Path
+from typing import List, Optional
+import numpy as np
+from pydantic import BaseModel
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.multioutput import MultiOutputRegressor
+import joblib
+from viral_script_engine.retention.feature_extractor import FeatureExtractor, ScriptFeatures
+_MODEL_PATH = Path(__file__).parent / "model.joblib"
+CURVE_TIMEPOINTS = [0, 3, 6, 10, 15, 20, 25, 30, 45, 60]
+class RetentionCurve(BaseModel):
+    timepoints: List[int]
+    values: List[float]
+    area_under_curve: float
+    drop_off_point: int  # first timepoint where retention drops below 0.5
+    @classmethod
+    def from_values(cls, values: List[float]) -> "RetentionCurve":
+        tps = CURVE_TIMEPOINTS
+        # Trapezoidal AUC, normalised to [0, 1]
+        auc = 0.0
+        for i in range(len(tps) - 1):
+            dt = tps[i + 1] - tps[i]
+            auc += dt * (values[i] + values[i + 1]) / 2
+        total_duration = tps[-1] - tps[0]
+        auc = auc / total_duration if total_duration > 0 else 0.0
+        drop_off = tps[-1]
+        for t, v in zip(tps, values):
+            if v < 0.5:
+                drop_off = t
+                break
+        return cls(
+            timepoints=list(tps),
+            values=[round(v, 4) for v in values],
+            area_under_curve=round(auc, 4),
+            drop_off_point=drop_off,
+        )
+class RetentionCurvePredictor:
+    """
+    Predicts a 10-point retention curve from script features.
+    10 points = retention at seconds [0, 3, 6, 10, 15, 20, 25, 30, 45, 60].
+    Uses MultiOutputRegressor(GradientBoostingRegressor).
+    Lightweight enough to run on CPU without GPU (<1ms per call after training).
+    """
+    MODEL_PATH = _MODEL_PATH
+    CURVE_TIMEPOINTS = CURVE_TIMEPOINTS
+    def __init__(self):
+        if _MODEL_PATH.exists():
+            self.model = joblib.load(_MODEL_PATH)
+            self._trained = True
+        else:
+            self.model = MultiOutputRegressor(
+                GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42)
+            )
+            self._trained = False
+    def train(
+        self,
+        dataset_path: Optional[str] = None,
+        cultural_kb_path: Optional[str] = None,
+    ) -> dict:
+        """
+        Train on retention_dataset.json. Saves model to MODEL_PATH.
+        Returns dict with avg_mae and mae_per_timepoint.
+        """
+        if dataset_path is None:
+            dataset_path = str(
+                Path(__file__).parent / "training_data" / "retention_dataset.json"
+            )
+        with open(dataset_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        extractor = FeatureExtractor(cultural_kb_path=cultural_kb_path)
+        X: List[List[float]] = []
+        y: List[List[float]] = []
+        skipped = 0
+        for sample in data["samples"]:
+            try:
+                features = extractor.extract(
+                    sample["script_text"], sample["platform"], sample["region"]
+                )
+                vec = features.to_vector()
+                if any(v != v for v in vec):  # NaN check
+                    skipped += 1
+                    continue
+                X.append(vec)
+                y.append(sample["retention_curve"])
+            except Exception:
+                skipped += 1
+        if not X:
+            raise RuntimeError("No valid training samples extracted.")
+        X_arr = np.array(X, dtype=float)
+        y_arr = np.array(y, dtype=float)
+        n = len(X_arr)
+        rng = np.random.RandomState(42)
+        idx = rng.permutation(n)
+        split = max(1, int(n * 0.8))
+        X_train, X_val = X_arr[idx[:split]], X_arr[idx[split:]]
+        y_train, y_val = y_arr[idx[:split]], y_arr[idx[split:]]
+        self.model.fit(X_train, y_train)
+        self._trained = True
+        val_preds = np.clip(self.model.predict(X_val), 0.0, 1.0)
+        mae_per_tp = np.mean(np.abs(val_preds - y_val), axis=0).tolist()
+        avg_mae = float(np.mean(mae_per_tp))
+        print(f"  Trained on {len(X_train)} samples, validated on {len(X_val)} (skipped {skipped})")
+        print("  Train/Val MAE per timepoint:")
+        for t, mae in zip(CURVE_TIMEPOINTS, mae_per_tp):
+            print(f"    {t:2d}s: {mae:.4f}")
+        print(f"  Avg MAE: {avg_mae:.4f}")
+        joblib.dump(self.model, _MODEL_PATH)
+        print(f"  Model saved to {_MODEL_PATH}")
+        return {"avg_mae": avg_mae, "mae_per_timepoint": mae_per_tp}
+    def predict(self, features: ScriptFeatures) -> RetentionCurve:
+        if not self._trained:
+            raise RuntimeError("Model not trained. Run train() first.")
+        vec = np.array(features.to_vector(), dtype=float).reshape(1, -1)
+        raw = self.model.predict(vec)[0]
+        clipped = np.clip(raw, 0.0, 1.0)
+        values = self._enforce_monotonic_decrease(clipped).tolist()
+        return RetentionCurve.from_values(values)
+    @staticmethod
+    def _enforce_monotonic_decrease(values: np.ndarray) -> np.ndarray:
+        result = values.copy()
+        for i in range(1, len(result)):
+            result[i] = min(result[i], result[i - 1])
+        return result

viral_script_engine/retention/curve_scorer.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from typing import List
+from pydantic import BaseModel
+from viral_script_engine.retention.curve_predictor import CURVE_TIMEPOINTS, RetentionCurve
+_TP_INDEX = {t: i for i, t in enumerate(CURVE_TIMEPOINTS)}
+class CurveScorerResult(BaseModel):
+    final_score: float
+    overall_improvement: float
+    targeted_improvement: float
+    regression_penalty: float
+    improved_timepoints: List[int]
+    worsened_timepoints: List[int]
+class RetentionCurveScorer:
+    """
+    Scores improvement between two retention curves.
+    Rewards targeted improvements at action-relevant timepoints:
+      - hook_rewrite      → early timepoints (0–6s)
+      - section_reorder   → mid timepoints (10–20s)
+      - cultural_ref_sub  → mid-to-late (15–30s)
+      - cta_placement     → late timepoints (45–60s)
+    Formula:
+      final = 0.50 * overall_improvement
+            + 0.35 * targeted_improvement
+            - 0.15 * regression_penalty
+      clipped to [0, 1]
+    """
+    ACTION_CURVE_MAP = {
+        "hook_rewrite":     [0, 3, 6],
+        "section_reorder":  [10, 15, 20],
+        "cultural_ref_sub": [15, 20, 25, 30],
+        "cta_placement":    [45, 60],
+    }
+    def score(
+        self,
+        original_curve: RetentionCurve,
+        new_curve: RetentionCurve,
+        action_type: str,
+    ) -> CurveScorerResult:
+        orig_auc = original_curve.area_under_curve
+        new_auc = new_curve.area_under_curve
+        # 1. Overall AUC improvement (relative)
+        if orig_auc > 0:
+            overall_improvement = (new_auc - orig_auc) / orig_auc
+        else:
+            overall_improvement = float(new_auc)
+        overall_improvement = max(-1.0, min(1.0, overall_improvement))
+        # 2. Targeted improvement at action-relevant timepoints
+        target_tps = self.ACTION_CURVE_MAP.get(str(action_type), CURVE_TIMEPOINTS)
+        targeted_deltas: List[float] = []
+        for tp in target_tps:
+            i = _TP_INDEX.get(tp)
+            if i is not None and i < len(original_curve.values) and i < len(new_curve.values):
+                targeted_deltas.append(new_curve.values[i] - original_curve.values[i])
+        if targeted_deltas:
+            targeted_improvement = float(sum(targeted_deltas) / len(targeted_deltas))
+        else:
+            targeted_improvement = 0.0
+        targeted_improvement = max(-1.0, min(1.0, targeted_improvement))
+        # 3. Regression penalty: any timepoint that degraded
+        improved: List[int] = []
+        worsened: List[int] = []
+        worsened_magnitudes: List[float] = []
+        for tp, i in _TP_INDEX.items():
+            if i >= len(original_curve.values) or i >= len(new_curve.values):
+                continue
+            delta = new_curve.values[i] - original_curve.values[i]
+            if delta > 0.001:
+                improved.append(tp)
+            elif delta < -0.001:
+                worsened.append(tp)
+                worsened_magnitudes.append(abs(delta))
+        regression_penalty = 0.0
+        if worsened_magnitudes:
+            regression_penalty = min(1.0, sum(worsened_magnitudes) / len(CURVE_TIMEPOINTS))
+        final_score = (
+            0.50 * max(0.0, overall_improvement)
+            + 0.35 * max(0.0, targeted_improvement)
+            - 0.15 * regression_penalty
+        )
+        final_score = max(0.0, min(1.0, final_score))
+        return CurveScorerResult(
+            final_score=round(final_score, 4),
+            overall_improvement=round(overall_improvement, 4),
+            targeted_improvement=round(targeted_improvement, 4),
+            regression_penalty=round(regression_penalty, 4),
+            improved_timepoints=improved,
+            worsened_timepoints=worsened,
+        )

viral_script_engine/retention/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import json
+import re
+from pathlib import Path
+from typing import List, Optional
+from pydantic import BaseModel
+from viral_script_engine.platforms.platform_spec import PlatformRegistry
+_FILLER_PHRASES = [
+    "hey guys", "welcome back", "today i want to", "so today",
+    "in this video", "what's up everyone", "hey everyone",
+    "guys today", "hello everyone", "so basically", "you know",
+    "kind of", "sort of", "basically", "um ", "uh ",
+]
+_COMMON_WORDS = {
+    'i', 'the', 'a', 'an', 'my', 'your', 'its', 'it', 'is', 'are',
+    'was', 'were', 'be', 'been', "i've", "i'm", "it's", "here's",
+    'today', 'and', 'but', 'so', 'that', 'this', 'these', 'those',
+    'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from',
+    'or', 'not', 'you', 'we', 'they', 'he', 'she', 'if', 'do',
+    'get', 'just', 'up', 'out', 'about', 'what', 'all', 'some',
+}
+_PROMISE_PATTERNS = [
+    r'\d',
+    r'\bhow to\b',
+    r'\bwhy\b',
+    r'\bwhat happens when\b',
+    r'\bi made\b',
+    r'\bwill\b',
+    r'\bguaranteed\b',
+    r'\bstep\b',
+    r'\btips?\b',
+    r'\bsecrets?\b',
+    r'\bprove[sd]?\b',
+    r'\bhere\'?s\b',
+]
+_KNOWN_PLATFORMS = ["Reels", "Shorts", "Feed", "TikTok"]
+class ScriptFeatures(BaseModel):
+    # Hook features (predicts early drop-off 0–5s)
+    hook_word_count: int
+    hook_has_number: bool
+    hook_has_question: bool
+    hook_has_promise: bool
+    hook_filler_score: float        # 0=no filler, 1=all filler
+    # Pacing features (predicts mid-video retention 5–30s)
+    avg_words_per_sentence: float
+    sentence_count: int
+    short_sentence_ratio: float     # sentences < 8 words / total sentences
+    section_balance_score: float    # how evenly hook:body:cta matches platform spec
+    # Content features (predicts late retention 30s+)
+    specificity_score: float        # ratio of specific nouns/numbers to total words
+    cultural_ref_count: int
+    cta_position_ratio: float       # word offset of CTA start / total words
+    # Platform fit features
+    platform: str
+    word_count: int
+    length_vs_optimal: float        # word_count / optimal_script_length for platform
+    def to_vector(self) -> List[float]:
+        platform_one_hot = [1.0 if self.platform == p else 0.0 for p in _KNOWN_PLATFORMS]
+        return [
+            float(self.hook_word_count),
+            1.0 if self.hook_has_number else 0.0,
+            1.0 if self.hook_has_question else 0.0,
+            1.0 if self.hook_has_promise else 0.0,
+            float(self.hook_filler_score),
+            float(self.avg_words_per_sentence),
+            float(self.sentence_count),
+            float(self.short_sentence_ratio),
+            float(self.section_balance_score),
+            float(self.specificity_score),
+            float(self.cultural_ref_count),
+            float(self.cta_position_ratio),
+            float(self.word_count),
+            float(self.length_vs_optimal),
+        ] + platform_one_hot
+class FeatureExtractor:
+    def __init__(self, cultural_kb_path: Optional[str] = None):
+        self.platform_registry = PlatformRegistry()
+        self._cultural_kb_path = cultural_kb_path
+        self._cultural_kb: Optional[dict] = None
+    def _load_kb(self) -> None:
+        if self._cultural_kb is not None:
+            return
+        kb_path = self._cultural_kb_path or str(
+            Path(__file__).parent.parent / "data" / "cultural_kb.json"
+        )
+        with open(kb_path, "r", encoding="utf-8") as f:
+            self._cultural_kb = json.load(f)
+    def extract(self, script: str, platform: str, region: str) -> ScriptFeatures:
+        self._load_kb()
+        spec = self.platform_registry.get(platform)
+        sentences = [s for s in re.split(r'(?<=[.!?])\s+', script.strip()) if s.strip()]
+        if not sentences:
+            sentences = [script]
+        total_words = len(script.split())
+        # --- Hook: first ~20% of sentences (min 1, max 3) ---
+        n = len(sentences)
+        hook_end = max(1, min(3, int(n * 0.2))) if n >= 5 else max(1, min(2, n))
+        hook_text = " ".join(sentences[:hook_end])
+        hook_lower = hook_text.lower()
+        hook_words = hook_text.split()
+        hook_word_count = len(hook_words)
+        hook_has_number = bool(re.search(r'\d', hook_text))
+        hook_has_question = '?' in hook_text
+        hook_has_promise = any(re.search(p, hook_lower) for p in _PROMISE_PATTERNS)
+        filler_hits = sum(1 for phrase in _FILLER_PHRASES if phrase in hook_lower)
+        hook_filler_score = min(1.0, filler_hits / max(hook_word_count, 1) * 4)
+        # --- Pacing ---
+        sentence_count = n
+        words_per_sent = [len(s.split()) for s in sentences]
+        avg_words_per_sentence = sum(words_per_sent) / max(n, 1)
+        short_sentence_ratio = sum(1 for w in words_per_sent if w < 8) / max(n, 1)
+        # Section balance: compare actual word distribution to platform spec
+        cta_start_idx = max(hook_end + 1, n - max(1, int(n * 0.1)))
+        hook_w = sum(len(s.split()) for s in sentences[:hook_end])
+        body_w = sum(len(s.split()) for s in sentences[hook_end:cta_start_idx])
+        cta_w = sum(len(s.split()) for s in sentences[cta_start_idx:])
+        total_w = max(hook_w + body_w + cta_w, 1)
+        opt = spec.optimal_sentences_per_section
+        opt_total = max(sum(opt.values()), 1)
+        opt_hook_r = opt.get("hook", 2) / opt_total
+        opt_body_r = opt.get("body", 6) / opt_total
+        act_hook_r = hook_w / total_w
+        act_body_r = body_w / total_w
+        balance_dev = (abs(act_hook_r - opt_hook_r) + abs(act_body_r - opt_body_r)) / 2
+        section_balance_score = max(0.0, 1.0 - balance_dev * 4)
+        # --- Content features ---
+        words = script.split()
+        specific_count = sum(
+            1 for w in words
+            if (
+                re.search(r'\d', w)
+                or (len(w) > 1 and w[0].isupper() and w.lower().strip('.,!?;:\'"') not in _COMMON_WORDS)
+            )
+        )
+        specificity_score = min(1.0, specific_count / max(total_words, 1))
+        cultural_ref_count = 0
+        if self._cultural_kb and region in self._cultural_kb:
+            kb = self._cultural_kb[region]
+            script_lower = script.lower()
+            cultural_ref_count = (
+                sum(1 for r in kb.get("valid_refs", []) if r.lower() in script_lower)
+                + sum(1 for i in kb.get("correct_idioms", []) if i.lower() in script_lower)
+            )
+        cta_word_offset = hook_w + body_w
+        cta_position_ratio = cta_word_offset / max(total_words, 1)
+        # --- Platform fit ---
+        length_vs_optimal = total_words / max(spec.optimal_script_length_words, 1)
+        return ScriptFeatures(
+            hook_word_count=hook_word_count,
+            hook_has_number=hook_has_number,
+            hook_has_question=hook_has_question,
+            hook_has_promise=hook_has_promise,
+            hook_filler_score=round(hook_filler_score, 4),
+            avg_words_per_sentence=round(avg_words_per_sentence, 4),
+            sentence_count=sentence_count,
+            short_sentence_ratio=round(short_sentence_ratio, 4),
+            section_balance_score=round(section_balance_score, 4),
+            specificity_score=round(specificity_score, 4),
+            cultural_ref_count=cultural_ref_count,
+            cta_position_ratio=round(cta_position_ratio, 4),
+            platform=platform,
+            word_count=total_words,
+            length_vs_optimal=round(length_vs_optimal, 4),
+        )

viral_script_engine/retention/model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4454048ccec161cf3db43cc3b02a2767b261e8f3df23cdab2a6904ec47eb2e00
+size 1876741

viral_script_engine/retention/training_data/__init__.py ADDED Viewed

File without changes

viral_script_engine/retention/training_data/build_dataset.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""
+Builds retention_dataset.json from rule-based simulation.
+Encoding known relationships between script quality scores and viewer retention:
+  - Hook quality (R1) predicts early drop-off at seconds 0–6
+  - Coherence (R2) predicts mid-video retention at seconds 6–20
+  - Cultural alignment (R3) predicts late retention at seconds 20–60
+Dataset format:
+{
+  "samples": [
+    {
+      "script_id": "train_001",
+      "script_text": "...",
+      "platform": "Reels",
+      "region": "Mumbai Gen Z",
+      "retention_curve": [1.0, 0.95, ...],   # 10 values at seconds [0,3,6,10,15,20,25,30,45,60]
+      "curve_source": "rule_based",
+      "quality_tier": "high" | "medium" | "low"
+    }
+  ]
+}
+"""
+import json
+import random
+from pathlib import Path
+from typing import List, Tuple
+_TIMEPOINTS = [0, 3, 6, 10, 15, 20, 25, 30, 45, 60]
+_OUTPUT_PATH = Path(__file__).parent / "retention_dataset.json"
+_PLATFORMS = ["Reels", "Shorts", "Feed", "TikTok"]
+_REGIONS = ["Mumbai Gen Z", "pan_india_english", "delhi_millennial", "bangalore_tech"]
+_HIGH_SCRIPTS = [
+    "Did you know {pct}% of people get this wrong? Here's what actually works. Stop doing what everyone tells you. Use this one simple method instead. The results will genuinely surprise you. Comment 'yes' if you want the full breakdown.",
+    "I made {amt}k in 30 days using this strategy. Nobody in your feed is talking about this. Here's exactly what I did step by step. You can start tonight with zero investment. Follow for the full guide.",
+    "Your phone is lying to you about money. Here's the truth about compound interest that banks don't want you to know. Start with just $50. Watch what happens after 12 months. This changed everything for me.",
+    "Stop scrolling. This is the {amt}-second trick that saved me {pct}% on every bill. I tested it for 3 months. Here's the proof. Save this before it gets taken down.",
+    "Why do {pct}% of people fail at saving money? I spent 6 months finding out. The answer surprised me. It has nothing to do with income. Watch till the end for the fix.",
+    "The {amt} investing mistake I made at 22 cost me {pct}k. Here's what I wish someone told me. Three rules that actually work. No BS, no courses to sell. Just what changed my life.",
+    "How to pay off debt {pct}% faster using the avalanche method. Most people use the wrong strategy. This is the math-backed approach. Takes 5 minutes to set up. Start today.",
+    "This bank trick gives you {pct}% more interest — your bank doesn't advertise it. Took me {amt} months to find it. Here's exactly how to set it up in under 2 minutes.",
+]
+_MED_SCRIPTS = [
+    "So today I want to talk about something that I think is really important for a lot of people. Financial planning is something that many people overlook. You should really try to save money regularly if you can. It makes a big difference over time when you think about it.",
+    "Hey everyone, welcome back to my channel. Today I'm sharing some tips about managing your finances better. These tips have helped me personally and I hope they help you too. Let me know in the comments what you think about them.",
+    "Saving money is actually not that hard once you get into the habit. The main thing is consistency. Try to set aside a fixed amount each month. Over time it really does add up significantly. There are several ways you can approach this.",
+    "I've been thinking a lot about financial health lately. It's something that affects everyone. The basics are pretty simple when you break them down. Budget, save, invest — in that order. Most people skip the middle step which is a mistake.",
+    "Money management is a skill that anyone can learn. It takes time and practice but it's worth it. Start by tracking your spending for one month. Then identify areas where you can cut back. After that you can start building your savings.",
+]
+_LOW_SCRIPTS = [
+    "Hello guys welcome back um so today basically I wanted to kind of talk about you know like finances and stuff. So basically what I mean is um you should save more money I guess. That's kind of the main point I think. Um yeah so basically just try to do that.",
+    "Hey everyone so basically today's video is about money and financial things. I mean you know like it's really important and stuff like that. So yeah basically just save money I guess. Um anyway thanks for watching and stuff.",
+    "So um welcome back to my channel. Today I kind of want to sort of discuss um financial things you know. Like basically everyone knows they should save money right. Um so yeah that's basically it I think. Like just try to be better with money or whatever.",
+    "Hey guys so um today we're going to talk about kind of like money and finances and all that stuff. So basically um the thing is you know it's pretty important I think. Like I don't know just try to save more I guess. Um yeah so basically that's the main thing.",
+    "Welcome back everyone so today basically I wanted to kind of share some thoughts on um financial stuff. Like you know it's important and everything. So basically just try to you know manage your money better or something like that. Um yeah I hope that helps.",
+]
+def _pick_script(quality: str) -> str:
+    amt = random.randint(10, 99)
+    pct = random.randint(60, 98)
+    if quality == "high":
+        template = random.choice(_HIGH_SCRIPTS)
+    elif quality == "medium":
+        template = random.choice(_MED_SCRIPTS)
+    else:
+        template = random.choice(_LOW_SCRIPTS)
+    return template.format(amt=amt, pct=pct)
+def _quality_to_scores(quality: str) -> Tuple[float, float, float]:
+    if quality == "high":
+        r1 = random.uniform(0.78, 1.0)
+        r2 = random.uniform(0.72, 1.0)
+        r3 = random.uniform(0.68, 1.0)
+    elif quality == "medium":
+        r1 = random.uniform(0.38, 0.65)
+        r2 = random.uniform(0.38, 0.65)
+        r3 = random.uniform(0.38, 0.65)
+    else:
+        r1 = random.uniform(0.05, 0.25)
+        r2 = random.uniform(0.08, 0.28)
+        r3 = random.uniform(0.08, 0.28)
+    return r1, r2, r3
+def _generate_curve(r1: float, r2: float, r3: float) -> List[float]:
+    """
+    Rule-based retention curve at timepoints [0, 3, 6, 10, 15, 20, 25, 30, 45, 60].
+    Rules from phase spec:
+      s0  = 1.0
+      s3  = 1.0 - (0.4 * (1 - r1))        # hook quality predicts early drop
+      s10 = prev - (0.1 * (1 - r2))        # coherence predicts mid-video
+      s20 = prev - (0.15 * (1 - r3))       # cultural alignment predicts late
+      s60 = prev - 0.05                    # natural decay always present
+    """
+    noise = lambda lo, hi: random.uniform(lo, hi)
+    s0 = 1.0
+    s3 = max(0.0, 1.0 - (0.4 * (1 - r1)) + noise(-0.02, 0.02))
+    s6 = max(0.0, s3 - noise(0.02, 0.06))
+    s10 = max(0.0, s6 - (0.1 * (1 - r2)) - noise(0.0, 0.03))
+    s15 = max(0.0, s10 - noise(0.03, 0.07))
+    s20 = max(0.0, s15 - (0.15 * (1 - r3)) - noise(0.0, 0.03))
+    s25 = max(0.0, s20 - noise(0.02, 0.05))
+    s30 = max(0.0, s25 - noise(0.02, 0.05))
+    s45 = max(0.0, s30 - 0.05 - noise(0.0, 0.04))
+    s60 = max(0.0, s45 - 0.05)
+    # enforce monotonic decrease
+    curve = [s0, s3, s6, s10, s15, s20, s25, s30, s45, s60]
+    for i in range(1, len(curve)):
+        curve[i] = min(curve[i], curve[i - 1])
+    return [round(v, 3) for v in curve]
+def build(output_path: str = None, seed: int = 42) -> str:
+    """Build and save the dataset. Returns the path written."""
+    random.seed(seed)
+    path = Path(output_path) if output_path else _OUTPUT_PATH
+    path.parent.mkdir(parents=True, exist_ok=True)
+    samples = []
+    idx = 1
+    for quality, count in [("high", 50), ("medium", 50), ("low", 50)]:
+        for _ in range(count):
+            platform = random.choice(_PLATFORMS)
+            region = random.choice(_REGIONS)
+            r1, r2, r3 = _quality_to_scores(quality)
+            curve = _generate_curve(r1, r2, r3)
+            samples.append({
+                "script_id": f"train_{idx:03d}",
+                "script_text": _pick_script(quality),
+                "platform": platform,
+                "region": region,
+                "retention_curve": curve,
+                "curve_source": "rule_based",
+                "quality_tier": quality,
+                "r1_score": round(r1, 3),
+                "r2_score": round(r2, 3),
+                "r3_score": round(r3, 3),
+            })
+            idx += 1
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump({"samples": samples}, f, indent=2)
+    return str(path)
+if __name__ == "__main__":
+    out = build()
+    print(f"Dataset built: {out} (150 samples)")

viral_script_engine/retention/training_data/retention_dataset.json ADDED Viewed

The diff for this file is too large to render. See raw diff

viral_script_engine/rewards/r10_retention_curve.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from typing import Optional
+from pydantic import BaseModel
+from viral_script_engine.retention.curve_predictor import RetentionCurve, RetentionCurvePredictor
+from viral_script_engine.retention.curve_scorer import CurveScorerResult, RetentionCurveScorer
+from viral_script_engine.retention.feature_extractor import FeatureExtractor
+class RetentionRewardResult(BaseModel):
+    score: float
+    original_curve: RetentionCurve
+    new_curve: RetentionCurve
+    curve_delta: CurveScorerResult
+class RetentionCurveReward:
+    """
+    Wraps the full retention prediction + scoring pipeline into a reward signal.
+    Caches the original curve per episode so the extractor is called only once
+    for the original script — subsequent steps reuse the cached curve.
+    """
+    def __init__(self, cultural_kb_path: Optional[str] = None):
+        self.extractor = FeatureExtractor(cultural_kb_path=cultural_kb_path)
+        self.predictor = RetentionCurvePredictor()
+        self.scorer = RetentionCurveScorer()
+        self._original_curve_cache: dict = {}
+    def score(
+        self,
+        original_script: str,
+        rewritten_script: str,
+        platform: str,
+        region: str,
+        action_type: str,
+        episode_id: str,
+    ) -> RetentionRewardResult:
+        # Cache original curve — compute only once per episode
+        if episode_id not in self._original_curve_cache:
+            orig_features = self.extractor.extract(original_script, platform, region)
+            self._original_curve_cache[episode_id] = self.predictor.predict(orig_features)
+        new_features = self.extractor.extract(rewritten_script, platform, region)
+        new_curve = self.predictor.predict(new_features)
+        result = self.scorer.score(
+            original_curve=self._original_curve_cache[episode_id],
+            new_curve=new_curve,
+            action_type=action_type,
+        )
+        return RetentionRewardResult(
+            score=result.final_score,
+            original_curve=self._original_curve_cache[episode_id],
+            new_curve=new_curve,
+            curve_delta=result,
+        )
+    def clear_cache(self, episode_id: Optional[str] = None) -> None:
+        if episode_id:
+            self._original_curve_cache.pop(episode_id, None)
+        else:
+            self._original_curve_cache.clear()

viral_script_engine/rewards/reward_aggregator.py CHANGED Viewed

@@ -12,6 +12,7 @@ _COMPONENT_FIELDS = [
     "r1_hook_strength", "r2_coherence", "r3_cultural_alignment",
     "r4_debate_resolution", "r5_defender_preservation",
     "r6_safety", "r7_originality", "r8_persona_fit", "r9_platform_pacing",
 ]
 _DROP_THRESHOLD = 0.25

     "r1_hook_strength", "r2_coherence", "r3_cultural_alignment",
     "r4_debate_resolution", "r5_defender_preservation",
     "r6_safety", "r7_originality", "r8_persona_fit", "r9_platform_pacing",
+    "r10_retention_curve",
 ]
 _DROP_THRESHOLD = 0.25

viral_script_engine/tests/test_phase12.py ADDED Viewed

	@@ -0,0 +1,325 @@

+"""Phase 12 tests — Retention Curve Simulator."""
+import json
+import sys
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import numpy as np
+import pytest
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from viral_script_engine.retention.feature_extractor import (
+    FeatureExtractor,
+    ScriptFeatures,
+    _KNOWN_PLATFORMS,
+)
+from viral_script_engine.retention.curve_predictor import (
+    RetentionCurve,
+    RetentionCurvePredictor,
+    CURVE_TIMEPOINTS,
+)
+from viral_script_engine.retention.curve_scorer import RetentionCurveScorer
+from viral_script_engine.rewards.r10_retention_curve import RetentionCurveReward
+_SCRIPTS_PATH = str(
+    Path(__file__).parent.parent / "data" / "test_scripts" / "scripts.json"
+)
+_CULTURAL_KB_PATH = str(
+    Path(__file__).parent.parent / "data" / "cultural_kb.json"
+)
+_GOOD_SCRIPT = (
+    "Did you know 80% of people get this wrong? Here's what actually works. "
+    "Stop doing what everyone tells you. Use this one simple method instead. "
+    "The results will surprise you. Follow for more."
+)
+_BAD_SCRIPT = (
+    "Hello guys welcome back um so today basically I wanted to kind of talk "
+    "about you know like finances and stuff. So basically just try to save money."
+)
+# ---------------------------------------------------------------------------
+# FeatureExtractor tests
+# ---------------------------------------------------------------------------
+def test_feature_extractor_produces_correct_features():
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    assert isinstance(features, ScriptFeatures)
+    assert features.hook_word_count > 0
+    assert features.sentence_count > 0
+    assert features.word_count > 0
+    assert features.platform == "Reels"
+    assert features.hook_has_number is True  # "80%"
+    assert features.hook_has_question is True  # "?"
+def test_feature_extractor_bad_script_has_high_filler():
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_BAD_SCRIPT, platform="Reels", region="pan_india_english")
+    # Bad script should have higher filler score than good script
+    good_features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    assert features.hook_filler_score >= good_features.hook_filler_score
+def test_to_vector_returns_flat_numeric_list():
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    vec = features.to_vector()
+    assert isinstance(vec, list)
+    assert len(vec) > 0
+    # No NaN values
+    for v in vec:
+        assert v == v, f"NaN found in vector: {vec}"
+    # All values are floats
+    for v in vec:
+        assert isinstance(v, (int, float))
+def test_to_vector_platform_one_hot():
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    for platform in _KNOWN_PLATFORMS:
+        features = extractor.extract(_GOOD_SCRIPT, platform=platform, region="pan_india_english")
+        vec = features.to_vector()
+        # Last N elements are one-hot platform encoding
+        platform_slice = vec[-len(_KNOWN_PLATFORMS):]
+        assert sum(platform_slice) == 1.0, f"One-hot sum should be 1 for {platform}"
+        assert max(platform_slice) == 1.0
+def test_to_vector_no_nan_for_bad_script():
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_BAD_SCRIPT, platform="TikTok", region="pan_india_english")
+    vec = features.to_vector()
+    for v in vec:
+        assert v == v, f"NaN found in vector"
+# ---------------------------------------------------------------------------
+# RetentionCurvePredictor tests
+# ---------------------------------------------------------------------------
+def test_predictor_raises_if_not_trained():
+    predictor = RetentionCurvePredictor.__new__(RetentionCurvePredictor)
+    predictor.model = None
+    predictor._trained = False
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    with pytest.raises(RuntimeError, match="not trained"):
+        predictor.predict(features)
+def _make_trained_predictor() -> RetentionCurvePredictor:
+    """Train predictor on a minimal in-memory dataset."""
+    from sklearn.ensemble import GradientBoostingRegressor
+    from sklearn.multioutput import MultiOutputRegressor
+    import numpy as np
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    scripts = [_GOOD_SCRIPT, _BAD_SCRIPT] * 10
+    platforms = ["Reels", "TikTok", "Shorts", "Feed"] * 5
+    X, y = [], []
+    for i, (sc, pl) in enumerate(zip(scripts, platforms)):
+        feat = extractor.extract(sc, platform=pl, region="pan_india_english")
+        X.append(feat.to_vector())
+        quality = 1.0 if sc == _GOOD_SCRIPT else 0.3
+        curve = [max(0.0, quality - j * 0.05) for j in range(len(CURVE_TIMEPOINTS))]
+        y.append(curve)
+    model = MultiOutputRegressor(
+        GradientBoostingRegressor(n_estimators=10, max_depth=2, random_state=42)
+    )
+    model.fit(np.array(X), np.array(y))
+    predictor = RetentionCurvePredictor.__new__(RetentionCurvePredictor)
+    predictor.model = model
+    predictor._trained = True
+    return predictor
+def test_predicted_curve_is_monotonically_non_increasing():
+    predictor = _make_trained_predictor()
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    curve = predictor.predict(features)
+    for i in range(1, len(curve.values)):
+        assert curve.values[i] <= curve.values[i - 1] + 1e-9, (
+            f"Curve not monotonic at index {i}: {curve.values[i - 1]} -> {curve.values[i]}"
+        )
+def test_predicted_curve_values_in_range():
+    predictor = _make_trained_predictor()
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_BAD_SCRIPT, platform="TikTok", region="pan_india_english")
+    curve = predictor.predict(features)
+    for v in curve.values:
+        assert 0.0 <= v <= 1.0, f"Value {v} out of [0, 1]"
+def test_predicted_curve_has_correct_timepoints():
+    predictor = _make_trained_predictor()
+    extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    features = extractor.extract(_GOOD_SCRIPT, platform="Reels", region="pan_india_english")
+    curve = predictor.predict(features)
+    assert curve.timepoints == CURVE_TIMEPOINTS
+    assert len(curve.values) == len(CURVE_TIMEPOINTS)
+# ---------------------------------------------------------------------------
+# RetentionCurveScorer tests
+# ---------------------------------------------------------------------------
+def _make_curve(values: list) -> RetentionCurve:
+    return RetentionCurve.from_values(values)
+def test_scorer_rewards_targeted_improvement():
+    scorer = RetentionCurveScorer()
+    # hook_rewrite targets [0, 3, 6] — improve those timepoints
+    orig_values = [1.0, 0.6, 0.5, 0.45, 0.42, 0.40, 0.38, 0.36, 0.32, 0.30]
+    new_values  = [1.0, 0.85, 0.75, 0.45, 0.42, 0.40, 0.38, 0.36, 0.32, 0.30]
+    result = scorer.score(
+        original_curve=_make_curve(orig_values),
+        new_curve=_make_curve(new_values),
+        action_type="hook_rewrite",
+    )
+    assert result.final_score > 0
+    assert result.targeted_improvement > 0
+    assert 3 in result.improved_timepoints or 6 in result.improved_timepoints
+def test_scorer_applies_regression_penalty_for_worsening():
+    scorer = RetentionCurveScorer()
+    orig_values = [1.0, 0.9, 0.8, 0.7, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40]
+    # Worsen the mid-video section
+    new_values  = [1.0, 0.9, 0.8, 0.5, 0.45, 0.40, 0.55, 0.50, 0.45, 0.40]
+    result = scorer.score(
+        original_curve=_make_curve(orig_values),
+        new_curve=_make_curve(new_values),
+        action_type="hook_rewrite",
+    )
+    assert result.regression_penalty > 0
+    assert len(result.worsened_timepoints) > 0
+def test_scorer_score_in_range():
+    scorer = RetentionCurveScorer()
+    orig_values = [1.0, 0.8, 0.7, 0.6, 0.55, 0.50, 0.46, 0.42, 0.38, 0.35]
+    new_values  = [1.0, 0.85, 0.75, 0.65, 0.60, 0.55, 0.50, 0.46, 0.42, 0.38]
+    result = scorer.score(
+        original_curve=_make_curve(orig_values),
+        new_curve=_make_curve(new_values),
+        action_type="section_reorder",
+    )
+    assert 0.0 <= result.final_score <= 1.0
+# ---------------------------------------------------------------------------
+# RetentionCurveReward — cache test
+# ---------------------------------------------------------------------------
+def test_retention_reward_caches_original_curve():
+    """FeatureExtractor.extract should be called only once for the original script per episode."""
+    predictor = _make_trained_predictor()
+    reward = RetentionCurveReward.__new__(RetentionCurveReward)
+    reward.extractor = FeatureExtractor(cultural_kb_path=_CULTURAL_KB_PATH)
+    reward.predictor = predictor
+    reward.scorer = RetentionCurveScorer()
+    reward._original_curve_cache = {}
+    call_count = {"n": 0}
+    original_extract = reward.extractor.extract
+    def counting_extract(script, platform, region):
+        call_count["n"] += 1
+        return original_extract(script, platform, region)
+    reward.extractor.extract = counting_extract
+    episode_id = "ep_cache_test"
+    for _ in range(3):
+        reward.score(
+            original_script=_GOOD_SCRIPT,
+            rewritten_script=_BAD_SCRIPT,
+            platform="Reels",
+            region="pan_india_english",
+            action_type="hook_rewrite",
+            episode_id=episode_id,
+        )
+    # extract called for original once + rewritten on every call = 1 + 3 = 4
+    # original is cached after first call → only 1 for original, 3 for rewritten = 4 total
+    assert call_count["n"] == 4, (
+        f"Expected 4 extract calls (1 original cached + 3 rewritten), got {call_count['n']}"
+    )
+# ---------------------------------------------------------------------------
+# env.step includes r10 in reward components
+# ---------------------------------------------------------------------------
+def test_env_step_includes_r10_when_model_trained():
+    """env.step() should include r10_retention_curve in reward components when model is trained."""
+    from viral_script_engine.environment.env import ViralScriptEnv
+    from unittest.mock import MagicMock
+    env = ViralScriptEnv(
+        scripts_path=_SCRIPTS_PATH,
+        cultural_kb_path=_CULTURAL_KB_PATH,
+        difficulty="easy",
+        use_escalation=False,
+        use_anti_gaming=False,
+    )
+    # Inject trained predictor
+    predictor = _make_trained_predictor()
+    env.r10.predictor = predictor
+    obs, _ = env.reset()
+    mock_critique = MagicMock()
+    mock_critique.claims = []
+    mock_critique.overall_severity = "low"
+    mock_defender = MagicMock()
+    mock_defender.core_strength = "Strong hook"
+    mock_defender.core_strength_quote = "Test quote"
+    mock_defender.defense_argument = "Good"
+    mock_defender.flagged_critic_claims = []
+    mock_defender.regional_voice_elements = []
+    mock_defender.model_dump.return_value = {}
+    mock_rewrite = MagicMock()
+    mock_rewrite.rewritten_script = obs["current_script"]
+    mock_rewrite.diff = ""
+    with patch.object(env.critic, "critique", return_value=mock_critique), \
+         patch.object(env.defender, "defend", return_value=mock_defender), \
+         patch.object(env.rewriter, "rewrite", return_value=mock_rewrite):
+        _, _, _, _, info = env.step({
+            "action_type": "hook_rewrite",
+            "target_section": "hook",
+            "instruction": "Strengthen the hook.",
+            "critique_claim_id": "C1",
+            "reasoning": "test",
+        })
+    rc = info["reward_components"]
+    assert "r10_retention_curve" in rc
+    assert rc["r10_retention_curve"] is not None
+    assert 0.0 <= rc["r10_retention_curve"] <= 1.0