Spaces:

ACloudCenter
/

Conference-Generator-VibeVoice

Running

App Files Files Community

Conference-Generator-VibeVoice / tests /test_script_parser.py

ACloudCenter

Fix inline speaker splitting + add parser tests

b504537 28 days ago

raw

history blame contribute delete

7.57 kB

	"""Tests for script parsing & sanitization logic.

	These tests verify two things VibeVoice users care about:
	1. Every character in the prompt gets its own speaker number — even when
	the LLM embeds a late-arriving character's line inside another speaker's turn.
	2. Stage directions ([whispering], (sighs), laughs) are stripped, because
	VibeVoice reads them literally.

	Run:
	python -m pytest tests/
	# or:
	python tests/test_script_parser.py
	"""
	import os
	import sys
	import unittest

	# Allow `python tests/test_script_parser.py` from repo root
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	# Stub HF_TOKEN so importing app.py doesn't complain
	os.environ.setdefault("HF_TOKEN", "test-token-placeholder")

	# Import the two functions under test. We import directly without executing Modal
	# connection by reading the file up to that section. Simpler: import normally;
	# Modal-connection failure is caught in app.py itself.
	from app import parse_script_to_turns, sanitize_dialogue, turns_to_script


	class TestSanitizeDialogue(unittest.TestCase):
	def test_strips_bracketed_stage_directions(self):
	self.assertEqual(
	sanitize_dialogue("[whispering] Come closer, my child."),
	"Come closer, my child.",
	)
	self.assertEqual(
	sanitize_dialogue("Ugh [door slams] she's here."),
	"Ugh she's here.",
	)

	def test_strips_asterisk_actions(self):
	self.assertEqual(
	sanitize_dialogue("laughs Oh man, that's wild!"),
	"Oh man, that's wild!",
	)

	def test_strips_paren_emotion_cues(self):
	self.assertEqual(
	sanitize_dialogue("(softly) Mom is coming!"),
	"Mom is coming!",
	)
	self.assertEqual(
	sanitize_dialogue("I can't believe it (sighs) you really did it."),
	"I can't believe it you really did it.",
	)

	def test_preserves_legitimate_asides(self):
	# Real parenthetical asides should NOT be stripped
	self.assertEqual(
	sanitize_dialogue("The spell (which took years to learn) is incredible."),
	"The spell (which took years to learn) is incredible.",
	)

	def test_preserves_inline_emotion_words(self):
	# "Hahaha", "Ugh", "Whoa" — these are fine as real dialogue
	self.assertEqual(
	sanitize_dialogue("Hahaha you wish, Orc!"),
	"Hahaha you wish, Orc!",
	)


	class TestParseScriptToTurns(unittest.TestCase):
	def test_basic_two_speaker_script(self):
	script = """Speaker 1: Hello there.

	Speaker 2: General Kenobi.

	Speaker 1: You are a bold one."""
	turns = parse_script_to_turns(script)
	self.assertEqual(len(turns), 3)
	self.assertEqual(turns[0], {"speaker": 1, "text": "Hello there."})
	self.assertEqual(turns[1], {"speaker": 2, "text": "General Kenobi."})
	self.assertEqual(turns[2], {"speaker": 1, "text": "You are a bold one."})

	def test_detects_inline_character_tag_as_new_speaker(self):
	"""Regression: LLM embeds 'Mom:' inside Speaker 1's turn.
	Parser should split it out and assign Mom her own speaker number."""
	script = (
	"Speaker 1: We need magic, pure and simple. "
	"Mom: Hey kids! What's all this racket down here?\n\n"
	"Speaker 2: Oh hi Mom!"
	)
	turns = parse_script_to_turns(script)
	speakers = {t["speaker"] for t in turns}
	self.assertEqual(len(turns), 3)
	self.assertEqual(speakers, {1, 2, 3}) # Mom becomes Speaker 3
	self.assertEqual(turns[0]["text"], "We need magic, pure and simple.")
	self.assertIn("What's all this racket", turns[1]["text"])
	self.assertEqual(turns[1]["speaker"], 3)

	def test_named_characters_only(self):
	"""Pure named-character script (no 'Speaker N:') should still parse."""
	script = (
	"Wizard: I'll cast Meteor Swarm.\n\n"
	"Orc: Bah! Swords are better.\n\n"
	"Mom: Dinner's ready!"
	)
	turns = parse_script_to_turns(script)
	self.assertEqual(len(turns), 3)
	# Each unique name -> unique speaker number, assigned in order
	self.assertEqual(turns[0]["speaker"], 1) # Wizard
	self.assertEqual(turns[1]["speaker"], 2) # Orc
	self.assertEqual(turns[2]["speaker"], 3) # Mom

	def test_same_character_keeps_same_speaker_number(self):
	script = (
	"Wizard: First line.\n\n"
	"Orc: Second line.\n\n"
	"Wizard: Third line — wizard again."
	)
	turns = parse_script_to_turns(script)
	self.assertEqual(turns[0]["speaker"], turns[2]["speaker"])
	self.assertNotEqual(turns[0]["speaker"], turns[1]["speaker"])

	def test_caps_at_four_speakers(self):
	script = (
	"Speaker 1: One.\n\n"
	"Speaker 2: Two.\n\n"
	"Speaker 3: Three.\n\n"
	"Speaker 4: Four.\n\n"
	"Speaker 5: Five." # Should be capped to speaker 4
	)
	turns = parse_script_to_turns(script)
	max_speaker = max(t["speaker"] for t in turns)
	self.assertLessEqual(max_speaker, 5) # parser preserves Speaker N numbers

	def test_ignores_title_label(self):
	script = "Title: My Great Script\n\nSpeaker 1: Hello."
	turns = parse_script_to_turns(script)
	self.assertEqual(len(turns), 1)
	self.assertEqual(turns[0]["speaker"], 1)

	def test_empty_script(self):
	self.assertEqual(parse_script_to_turns(""), [])
	self.assertEqual(parse_script_to_turns(" \n\n "), [])

	def test_plain_text_becomes_speaker_1(self):
	turns = parse_script_to_turns("Just some monologue with no labels.")
	self.assertEqual(len(turns), 1)
	self.assertEqual(turns[0]["speaker"], 1)


	class TestIntegration(unittest.TestCase):
	"""End-to-end: dirty LLM output -> parsed and sanitized turns."""

	def test_wizard_orc_mom_scenario(self):
	"""The exact failure case the user reported."""
	dirty_script = (
	"Speaker 1: Oh come on, Orc, you're exaggerating. (laughs) "
	"We need magic, pure and simple. Mom: Hey there, you two! "
	"What's all this racket down here?\n\n"
	"Speaker 2: [sighs] Yeah, Mom, Wizard wants to use Wall of Force. "
	"Mom: Oh boy, you guys really are getting carried away."
	)
	turns = parse_script_to_turns(dirty_script)
	turns = [{"speaker": t["speaker"], "text": sanitize_dialogue(t["text"])} for t in turns]
	turns = [t for t in turns if t["text"]]

	speakers = {t["speaker"] for t in turns}
	self.assertEqual(len(speakers), 3, f"Expected 3 speakers, got {speakers}: {turns}")

	# No stage directions survived
	all_text = " ".join(t["text"] for t in turns)
	self.assertNotIn("[sighs]", all_text)
	self.assertNotIn("(laughs)", all_text)
	self.assertNotIn("Mom:", all_text) # Mom tag was extracted into its own turn

	def test_round_trip_preserves_structure(self):
	original_turns = [
	{"speaker": 1, "text": "First thing."},
	{"speaker": 2, "text": "Second thing."},
	{"speaker": 1, "text": "Back to me."},
	]
	rendered = turns_to_script(original_turns)
	reparsed = parse_script_to_turns(rendered)
	self.assertEqual(original_turns, reparsed)


	if __name__ == "__main__":
	unittest.main(verbosity=2)