| """Tests for script parsing & sanitization logic. |
| |
| These tests verify two things VibeVoice users care about: |
| 1. Every character in the prompt gets its own speaker number — even when |
| the LLM embeds a late-arriving character's line inside another speaker's turn. |
| 2. Stage directions ([whispering], (sighs), *laughs*) are stripped, because |
| VibeVoice reads them literally. |
| |
| Run: |
| python -m pytest tests/ |
| # or: |
| python tests/test_script_parser.py |
| """ |
| import os |
| import sys |
| import unittest |
|
|
| |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| |
| os.environ.setdefault("HF_TOKEN", "test-token-placeholder") |
|
|
| |
| |
| |
| from app import parse_script_to_turns, sanitize_dialogue, turns_to_script |
|
|
|
|
| class TestSanitizeDialogue(unittest.TestCase): |
| def test_strips_bracketed_stage_directions(self): |
| self.assertEqual( |
| sanitize_dialogue("[whispering] Come closer, my child."), |
| "Come closer, my child.", |
| ) |
| self.assertEqual( |
| sanitize_dialogue("Ugh [door slams] she's here."), |
| "Ugh she's here.", |
| ) |
|
|
| def test_strips_asterisk_actions(self): |
| self.assertEqual( |
| sanitize_dialogue("*laughs* Oh man, that's wild!"), |
| "Oh man, that's wild!", |
| ) |
|
|
| def test_strips_paren_emotion_cues(self): |
| self.assertEqual( |
| sanitize_dialogue("(softly) Mom is coming!"), |
| "Mom is coming!", |
| ) |
| self.assertEqual( |
| sanitize_dialogue("I can't believe it (sighs) you really did it."), |
| "I can't believe it you really did it.", |
| ) |
|
|
| def test_preserves_legitimate_asides(self): |
| |
| self.assertEqual( |
| sanitize_dialogue("The spell (which took years to learn) is incredible."), |
| "The spell (which took years to learn) is incredible.", |
| ) |
|
|
| def test_preserves_inline_emotion_words(self): |
| |
| self.assertEqual( |
| sanitize_dialogue("Hahaha you wish, Orc!"), |
| "Hahaha you wish, Orc!", |
| ) |
|
|
|
|
| class TestParseScriptToTurns(unittest.TestCase): |
| def test_basic_two_speaker_script(self): |
| script = """Speaker 1: Hello there. |
| |
| Speaker 2: General Kenobi. |
| |
| Speaker 1: You are a bold one.""" |
| turns = parse_script_to_turns(script) |
| self.assertEqual(len(turns), 3) |
| self.assertEqual(turns[0], {"speaker": 1, "text": "Hello there."}) |
| self.assertEqual(turns[1], {"speaker": 2, "text": "General Kenobi."}) |
| self.assertEqual(turns[2], {"speaker": 1, "text": "You are a bold one."}) |
|
|
| def test_detects_inline_character_tag_as_new_speaker(self): |
| """Regression: LLM embeds 'Mom:' inside Speaker 1's turn. |
| Parser should split it out and assign Mom her own speaker number.""" |
| script = ( |
| "Speaker 1: We need magic, pure and simple. " |
| "Mom: Hey kids! What's all this racket down here?\n\n" |
| "Speaker 2: Oh hi Mom!" |
| ) |
| turns = parse_script_to_turns(script) |
| speakers = {t["speaker"] for t in turns} |
| self.assertEqual(len(turns), 3) |
| self.assertEqual(speakers, {1, 2, 3}) |
| self.assertEqual(turns[0]["text"], "We need magic, pure and simple.") |
| self.assertIn("What's all this racket", turns[1]["text"]) |
| self.assertEqual(turns[1]["speaker"], 3) |
|
|
| def test_named_characters_only(self): |
| """Pure named-character script (no 'Speaker N:') should still parse.""" |
| script = ( |
| "Wizard: I'll cast Meteor Swarm.\n\n" |
| "Orc: Bah! Swords are better.\n\n" |
| "Mom: Dinner's ready!" |
| ) |
| turns = parse_script_to_turns(script) |
| self.assertEqual(len(turns), 3) |
| |
| self.assertEqual(turns[0]["speaker"], 1) |
| self.assertEqual(turns[1]["speaker"], 2) |
| self.assertEqual(turns[2]["speaker"], 3) |
|
|
| def test_same_character_keeps_same_speaker_number(self): |
| script = ( |
| "Wizard: First line.\n\n" |
| "Orc: Second line.\n\n" |
| "Wizard: Third line — wizard again." |
| ) |
| turns = parse_script_to_turns(script) |
| self.assertEqual(turns[0]["speaker"], turns[2]["speaker"]) |
| self.assertNotEqual(turns[0]["speaker"], turns[1]["speaker"]) |
|
|
| def test_caps_at_four_speakers(self): |
| script = ( |
| "Speaker 1: One.\n\n" |
| "Speaker 2: Two.\n\n" |
| "Speaker 3: Three.\n\n" |
| "Speaker 4: Four.\n\n" |
| "Speaker 5: Five." |
| ) |
| turns = parse_script_to_turns(script) |
| max_speaker = max(t["speaker"] for t in turns) |
| self.assertLessEqual(max_speaker, 5) |
|
|
| def test_ignores_title_label(self): |
| script = "Title: My Great Script\n\nSpeaker 1: Hello." |
| turns = parse_script_to_turns(script) |
| self.assertEqual(len(turns), 1) |
| self.assertEqual(turns[0]["speaker"], 1) |
|
|
| def test_empty_script(self): |
| self.assertEqual(parse_script_to_turns(""), []) |
| self.assertEqual(parse_script_to_turns(" \n\n "), []) |
|
|
| def test_plain_text_becomes_speaker_1(self): |
| turns = parse_script_to_turns("Just some monologue with no labels.") |
| self.assertEqual(len(turns), 1) |
| self.assertEqual(turns[0]["speaker"], 1) |
|
|
|
|
| class TestIntegration(unittest.TestCase): |
| """End-to-end: dirty LLM output -> parsed and sanitized turns.""" |
|
|
| def test_wizard_orc_mom_scenario(self): |
| """The exact failure case the user reported.""" |
| dirty_script = ( |
| "Speaker 1: Oh come on, Orc, you're exaggerating. (laughs) " |
| "We need magic, pure and simple. Mom: Hey there, you two! " |
| "What's all this racket down here?\n\n" |
| "Speaker 2: [sighs] Yeah, Mom, Wizard wants to use Wall of Force. " |
| "Mom: Oh boy, you guys really are getting carried away." |
| ) |
| turns = parse_script_to_turns(dirty_script) |
| turns = [{"speaker": t["speaker"], "text": sanitize_dialogue(t["text"])} for t in turns] |
| turns = [t for t in turns if t["text"]] |
|
|
| speakers = {t["speaker"] for t in turns} |
| self.assertEqual(len(speakers), 3, f"Expected 3 speakers, got {speakers}: {turns}") |
|
|
| |
| all_text = " ".join(t["text"] for t in turns) |
| self.assertNotIn("[sighs]", all_text) |
| self.assertNotIn("(laughs)", all_text) |
| self.assertNotIn("Mom:", all_text) |
|
|
| def test_round_trip_preserves_structure(self): |
| original_turns = [ |
| {"speaker": 1, "text": "First thing."}, |
| {"speaker": 2, "text": "Second thing."}, |
| {"speaker": 1, "text": "Back to me."}, |
| ] |
| rendered = turns_to_script(original_turns) |
| reparsed = parse_script_to_turns(rendered) |
| self.assertEqual(original_turns, reparsed) |
|
|
|
|
| if __name__ == "__main__": |
| unittest.main(verbosity=2) |
|
|