File size: 7,570 Bytes
b504537 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | """Tests for script parsing & sanitization logic.
These tests verify two things VibeVoice users care about:
1. Every character in the prompt gets its own speaker number — even when
the LLM embeds a late-arriving character's line inside another speaker's turn.
2. Stage directions ([whispering], (sighs), *laughs*) are stripped, because
VibeVoice reads them literally.
Run:
python -m pytest tests/
# or:
python tests/test_script_parser.py
"""
import os
import sys
import unittest
# Allow `python tests/test_script_parser.py` from repo root
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Stub HF_TOKEN so importing app.py doesn't complain
os.environ.setdefault("HF_TOKEN", "test-token-placeholder")
# Import the two functions under test. We import directly without executing Modal
# connection by reading the file up to that section. Simpler: import normally;
# Modal-connection failure is caught in app.py itself.
from app import parse_script_to_turns, sanitize_dialogue, turns_to_script
class TestSanitizeDialogue(unittest.TestCase):
def test_strips_bracketed_stage_directions(self):
self.assertEqual(
sanitize_dialogue("[whispering] Come closer, my child."),
"Come closer, my child.",
)
self.assertEqual(
sanitize_dialogue("Ugh [door slams] she's here."),
"Ugh she's here.",
)
def test_strips_asterisk_actions(self):
self.assertEqual(
sanitize_dialogue("*laughs* Oh man, that's wild!"),
"Oh man, that's wild!",
)
def test_strips_paren_emotion_cues(self):
self.assertEqual(
sanitize_dialogue("(softly) Mom is coming!"),
"Mom is coming!",
)
self.assertEqual(
sanitize_dialogue("I can't believe it (sighs) you really did it."),
"I can't believe it you really did it.",
)
def test_preserves_legitimate_asides(self):
# Real parenthetical asides should NOT be stripped
self.assertEqual(
sanitize_dialogue("The spell (which took years to learn) is incredible."),
"The spell (which took years to learn) is incredible.",
)
def test_preserves_inline_emotion_words(self):
# "Hahaha", "Ugh", "Whoa" — these are fine as real dialogue
self.assertEqual(
sanitize_dialogue("Hahaha you wish, Orc!"),
"Hahaha you wish, Orc!",
)
class TestParseScriptToTurns(unittest.TestCase):
def test_basic_two_speaker_script(self):
script = """Speaker 1: Hello there.
Speaker 2: General Kenobi.
Speaker 1: You are a bold one."""
turns = parse_script_to_turns(script)
self.assertEqual(len(turns), 3)
self.assertEqual(turns[0], {"speaker": 1, "text": "Hello there."})
self.assertEqual(turns[1], {"speaker": 2, "text": "General Kenobi."})
self.assertEqual(turns[2], {"speaker": 1, "text": "You are a bold one."})
def test_detects_inline_character_tag_as_new_speaker(self):
"""Regression: LLM embeds 'Mom:' inside Speaker 1's turn.
Parser should split it out and assign Mom her own speaker number."""
script = (
"Speaker 1: We need magic, pure and simple. "
"Mom: Hey kids! What's all this racket down here?\n\n"
"Speaker 2: Oh hi Mom!"
)
turns = parse_script_to_turns(script)
speakers = {t["speaker"] for t in turns}
self.assertEqual(len(turns), 3)
self.assertEqual(speakers, {1, 2, 3}) # Mom becomes Speaker 3
self.assertEqual(turns[0]["text"], "We need magic, pure and simple.")
self.assertIn("What's all this racket", turns[1]["text"])
self.assertEqual(turns[1]["speaker"], 3)
def test_named_characters_only(self):
"""Pure named-character script (no 'Speaker N:') should still parse."""
script = (
"Wizard: I'll cast Meteor Swarm.\n\n"
"Orc: Bah! Swords are better.\n\n"
"Mom: Dinner's ready!"
)
turns = parse_script_to_turns(script)
self.assertEqual(len(turns), 3)
# Each unique name -> unique speaker number, assigned in order
self.assertEqual(turns[0]["speaker"], 1) # Wizard
self.assertEqual(turns[1]["speaker"], 2) # Orc
self.assertEqual(turns[2]["speaker"], 3) # Mom
def test_same_character_keeps_same_speaker_number(self):
script = (
"Wizard: First line.\n\n"
"Orc: Second line.\n\n"
"Wizard: Third line — wizard again."
)
turns = parse_script_to_turns(script)
self.assertEqual(turns[0]["speaker"], turns[2]["speaker"])
self.assertNotEqual(turns[0]["speaker"], turns[1]["speaker"])
def test_caps_at_four_speakers(self):
script = (
"Speaker 1: One.\n\n"
"Speaker 2: Two.\n\n"
"Speaker 3: Three.\n\n"
"Speaker 4: Four.\n\n"
"Speaker 5: Five." # Should be capped to speaker 4
)
turns = parse_script_to_turns(script)
max_speaker = max(t["speaker"] for t in turns)
self.assertLessEqual(max_speaker, 5) # parser preserves Speaker N numbers
def test_ignores_title_label(self):
script = "Title: My Great Script\n\nSpeaker 1: Hello."
turns = parse_script_to_turns(script)
self.assertEqual(len(turns), 1)
self.assertEqual(turns[0]["speaker"], 1)
def test_empty_script(self):
self.assertEqual(parse_script_to_turns(""), [])
self.assertEqual(parse_script_to_turns(" \n\n "), [])
def test_plain_text_becomes_speaker_1(self):
turns = parse_script_to_turns("Just some monologue with no labels.")
self.assertEqual(len(turns), 1)
self.assertEqual(turns[0]["speaker"], 1)
class TestIntegration(unittest.TestCase):
"""End-to-end: dirty LLM output -> parsed and sanitized turns."""
def test_wizard_orc_mom_scenario(self):
"""The exact failure case the user reported."""
dirty_script = (
"Speaker 1: Oh come on, Orc, you're exaggerating. (laughs) "
"We need magic, pure and simple. Mom: Hey there, you two! "
"What's all this racket down here?\n\n"
"Speaker 2: [sighs] Yeah, Mom, Wizard wants to use Wall of Force. "
"Mom: Oh boy, you guys really are getting carried away."
)
turns = parse_script_to_turns(dirty_script)
turns = [{"speaker": t["speaker"], "text": sanitize_dialogue(t["text"])} for t in turns]
turns = [t for t in turns if t["text"]]
speakers = {t["speaker"] for t in turns}
self.assertEqual(len(speakers), 3, f"Expected 3 speakers, got {speakers}: {turns}")
# No stage directions survived
all_text = " ".join(t["text"] for t in turns)
self.assertNotIn("[sighs]", all_text)
self.assertNotIn("(laughs)", all_text)
self.assertNotIn("Mom:", all_text) # Mom tag was extracted into its own turn
def test_round_trip_preserves_structure(self):
original_turns = [
{"speaker": 1, "text": "First thing."},
{"speaker": 2, "text": "Second thing."},
{"speaker": 1, "text": "Back to me."},
]
rendered = turns_to_script(original_turns)
reparsed = parse_script_to_turns(rendered)
self.assertEqual(original_turns, reparsed)
if __name__ == "__main__":
unittest.main(verbosity=2)
|