Spaces:

saumilyajj
/

driftcall

Paused

App Files Files Community

driftcall / tests /test_step_07_task_generator.py

saumilyajj

Upload folder using huggingface_hub

f2df60e verified about 1 month ago

raw

history blame contribute delete

35.4 kB

	"""Tests for cells/step_07_task_generator.py.

	Implements docs/tests/task_generator_tests.md:
	- 30 unit tests (U1–U30, U34–U39)
	- 6 hypothesis property tests (P1–P6)
	- 5 integration tests (I1–I5)
	"""

	from __future__ import annotations

	import dataclasses
	import hashlib
	import re
	import unicodedata
	from collections import Counter
	from math import sqrt
	from typing import TYPE_CHECKING, Any

	import pytest
	import yaml
	from hypothesis import given, settings
	from hypothesis import strategies as st

	from cells import step_07_task_generator as tg
	from cells.step_07_task_generator import (
	InvalidBudgetError,
	InvalidLanguageError,
	InvalidLanguageWeightError,
	InvalidStageError,
	MissingSlotError,
	NoVariantForLanguageError,
	SlotDistribution,
	Template,
	TemplateFileMissingError,
	TemplateLibrary,
	TemplateSchemaError,
	UnicodeNormalizationError,
	enumerate_variants,
	generate,
	load_templates,
	stable_sub_seed,
	)

	if TYPE_CHECKING:
	from pathlib import Path

	from cells.step_04_models import GoalSpec

	# ---------------------------------------------------------------------------
	# Shared fixtures / weight constants (§5.3 of the test plan)
	# ---------------------------------------------------------------------------

	STAGE_1_WEIGHTS: dict[str, float] = {
	"en": 0.50,
	"hi": 0.30,
	"hinglish": 0.20,
	"ta": 0.00,
	"kn": 0.00,
	}
	STAGE_2_WEIGHTS: dict[str, float] = {
	"en": 0.30,
	"hi": 0.30,
	"hinglish": 0.20,
	"ta": 0.10,
	"kn": 0.10,
	}
	STAGE_3_WEIGHTS: dict[str, float] = {
	"en": 0.30,
	"hi": 0.30,
	"hinglish": 0.20,
	"ta": 0.10,
	"kn": 0.10,
	}


	@pytest.fixture(autouse=True)
	def _install_test_library(tmp_path_factory: pytest.TempPathFactory) -> Any:
	"""Install a fully-wired fixture library for every test.

	Tests that need the production ``data/task_briefs/templates.yaml`` or a
	custom library override must call ``tg.set_library_override()`` inside
	the test body — this fixture only sets the default.
	"""
	tg.set_library_override(None)
	tg.reset_library_cache()
	fixture_dir = tmp_path_factory.mktemp("task_gen_fixture")
	_write_fixture_library(fixture_dir)
	lib = load_templates(fixture_dir / "templates.yaml")
	tg.set_library_override(lib)
	yield
	tg.set_library_override(None)
	tg.reset_library_cache()


	# ---------------------------------------------------------------------------
	# §1.1 Determinism (U1–U5)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestDeterminism:
	def test_generate_same_seed_same_goalspec(self) -> None: # U1
	first = generate(42, 1, STAGE_1_WEIGHTS)
	for _ in range(99):
	assert generate(42, 1, STAGE_1_WEIGHTS) == first

	def test_generate_byte_identical_seed_utterance_after_nfc(self) -> None: # U2
	first_bytes = generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8")
	for _ in range(99):
	assert (
	generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8")
	== first_bytes
	)

	def test_generate_different_seeds_different_episodes(self) -> None: # U3
	results = [generate(s, 3, STAGE_3_WEIGHTS) for s in range(100)]
	assert len({g.seed_utterance for g in results}) > 90

	def test_generate_stage_changes_template_pool(self) -> None: # U4
	g1 = generate(42, 1, STAGE_3_WEIGHTS)
	g3 = generate(42, 3, STAGE_3_WEIGHTS)
	assert len(g1.constraints) <= 2
	assert len(g3.constraints) <= 4

	def test_generate_returns_frozen_goalspec(self) -> None: # U5
	g = generate(42, 1, STAGE_1_WEIGHTS)
	assert dataclasses.is_dataclass(g)
	assert g.__dataclass_params__.frozen is True # type: ignore[attr-defined]


	# ---------------------------------------------------------------------------
	# §1.2 Stage-aware constraint counts (U6–U8)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestStageConstraintCounts:
	def test_stage_1_constraint_count_leq_2(self) -> None: # U6
	for s in range(200):
	g = generate(s, 1, STAGE_1_WEIGHTS)
	assert len(g.constraints) <= 2, (s, g.constraints)

	def test_stage_2_constraint_count_leq_3(self) -> None: # U7
	for s in range(200):
	g = generate(s, 2, STAGE_2_WEIGHTS)
	assert len(g.constraints) <= 3, (s, g.constraints)

	def test_stage_3_constraint_count_leq_4(self) -> None: # U8
	for s in range(200):
	g = generate(s, 3, STAGE_3_WEIGHTS)
	assert len(g.constraints) <= 4, (s, g.constraints)


	# ---------------------------------------------------------------------------
	# §1.3 Language-weight distribution (U9, U10)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestLanguageWeightDistribution:
	def test_language_weights_sampled_distribution_matches_at_n1000(self) -> None: # U9
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	n = 1000
	counts = Counter(
	generate(s, 3, weights).language for s in range(n)
	)
	for lang, p in weights.items():
	observed = counts.get(lang, 0) / n
	# ±3σ tolerance to avoid flakiness while still catching implementation bugs.
	sigma = sqrt(p * (1 - p) / n)
	assert abs(observed - p) < 3 * sigma + 1e-6, (lang, observed, p)

	def test_language_weights_zero_keys_never_drawn(self) -> None: # U10
	weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
	for s in range(500):
	assert generate(s, 3, weights).language == "en"


	# ---------------------------------------------------------------------------
	# §1.4 Validation exceptions (U11–U19)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestValidationExceptions:
	def test_invalid_language_error_on_unsupported_key(self) -> None: # U11
	with pytest.raises(InvalidLanguageError):
	generate(0, 1, {"hindi": 1.0}) # type: ignore[dict-item]

	def test_invalid_language_error_on_marathi_key(self) -> None: # U12
	with pytest.raises(InvalidLanguageError, match="marathi"):
	generate(0, 1, {"en": 0.5, "marathi": 0.5}) # type: ignore[dict-item]

	def test_invalid_language_weight_error_empty_dict(self) -> None: # U13
	with pytest.raises(InvalidLanguageWeightError):
	generate(0, 1, {})

	def test_invalid_language_weight_error_negative_value(self) -> None: # U14
	with pytest.raises(InvalidLanguageWeightError):
	generate(0, 1, {"en": 1.5, "hi": -0.5})

	def test_invalid_language_weight_error_sum_mismatch_low(self) -> None: # U15
	with pytest.raises(InvalidLanguageWeightError):
	generate(0, 1, {"en": 0.5, "hi": 0.3})

	def test_invalid_language_weight_error_sum_mismatch_high(self) -> None: # U16
	with pytest.raises(InvalidLanguageWeightError):
	generate(0, 1, {"en": 0.7, "hi": 0.5})

	def test_invalid_language_weight_error_all_zero(self) -> None: # U17
	# Direct all-zero (sum 0) triggers the sum-mismatch branch;
	# the all-zero defensive branch is covered via a weights dict that
	# normalizes to 1.0 via floating-point noise. We assert via sum=1
	# impossible with all zeros, so instead patch: use empty-style.
	# The design specifies defensive redundant check — to exercise it
	# directly, we call the private validator with a hand-crafted input
	# that the sum-check would otherwise let through.
	with pytest.raises(InvalidLanguageWeightError):
	tg._validate_language_weights(
	{"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
	)

	@pytest.mark.parametrize("bad_stage", [0, 4, -1])
	def test_invalid_stage_error(self, bad_stage: int) -> None: # U18
	with pytest.raises(InvalidStageError):
	generate(0, bad_stage, STAGE_1_WEIGHTS) # type: ignore[arg-type]

	def test_template_file_missing_error(self, tmp_path: Path) -> None: # U19
	with pytest.raises(TemplateFileMissingError):
	load_templates(tmp_path / "does_not_exist.yaml")


	# ---------------------------------------------------------------------------
	# §1.5 Unicode NFC (U20–U24)
	# ---------------------------------------------------------------------------


	def _single_lang_weights(code: str) -> dict[str, float]:
	return {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} \| {code: 1.0}


	@pytest.mark.unit
	class TestNFC:
	def test_seed_utterance_is_nfc_for_every_language(self) -> None: # U20
	for code in ("hi", "ta", "kn", "en", "hinglish"):
	g = generate(7, 2, _single_lang_weights(code))
	assert unicodedata.is_normalized("NFC", g.seed_utterance)

	def test_slotgrid_string_values_are_nfc(self) -> None: # U21
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	for s in range(50):
	g = generate(s, 3, weights)
	for v in g.slots.values():
	if isinstance(v, str):
	assert unicodedata.is_normalized("NFC", v), (s, v)

	def test_i18n_yaml_loaded_values_are_nfc(self, tmp_path: Path) -> None: # U22
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	for _lang, block in lib.i18n.items():
	for v in block.values():
	assert unicodedata.is_normalized("NFC", v)

	def test_templates_yaml_variant_strings_are_nfc_post_load(
	self, tmp_path: Path
	) -> None: # U23
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	for t in lib.templates:
	for variants in t.language_variants.values():
	for v in variants:
	assert unicodedata.is_normalized("NFC", v)

	def test_nfd_input_renormalized_to_nfc_on_load(self, tmp_path: Path) -> None: # U24
	_write_fixture_library(tmp_path)
	# Overwrite one variant with NFD-encoded text.
	nfd_kannada = unicodedata.normalize("NFD", "ಬೆಂಗಳೂರು")
	assert not unicodedata.is_normalized("NFC", nfd_kannada) or True # NFC may equal NFD for this str
	yaml_path = tmp_path / "i18n.yaml"
	data = {
	"hi": {"cities": {"BLR": unicodedata.normalize("NFD", "बेंगलुरु")}},
	"ta": {"cities": {"BLR": "பெங்களூரு"}},
	"kn": {"cities": {"BLR": nfd_kannada}},
	"en": {"cities": {"BLR": "Bengaluru"}},
	"hinglish": {"cities": {"BLR": "Bengaluru"}},
	}
	yaml_path.write_text(yaml.safe_dump(data, allow_unicode=True), encoding="utf-8")
	lib = load_templates(tmp_path / "templates.yaml")
	for _lang, block in lib.i18n.items():
	for v in block.values():
	assert unicodedata.is_normalized("NFC", v)


	# ---------------------------------------------------------------------------
	# §1.6 stable_sub_seed domain separation (U25–U28)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestSubSeed:
	def test_stable_sub_seed_formula(self) -> None: # U25
	expected = int.from_bytes(
	hashlib.blake2b(b"42:domain", digest_size=8).digest(), "big"
	)
	assert stable_sub_seed(42, "domain") == expected

	def test_sub_seed_tags_differ_per_decision(self) -> None: # U26
	tags = ["domain", "template", "slots", "language", "variant"]
	out = {stable_sub_seed(42, t) for t in tags}
	assert len(out) == 5

	def test_sub_seed_stable_across_runs(self) -> None: # U27
	a = stable_sub_seed(42, "domain")
	b = stable_sub_seed(42, "domain")
	assert a == b

	def test_sub_seed_different_seed_different_output(self) -> None: # U28
	assert stable_sub_seed(42, "domain") != stable_sub_seed(43, "domain")


	# ---------------------------------------------------------------------------
	# §1.7 Structural invariants (U29, U30)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestStructuralInvariants:
	def test_seed_utterance_has_no_unresolved_placeholders(self) -> None: # U29
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	for s in range(100):
	g = generate(s, 3, weights)
	assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None, (
	s,
	g.seed_utterance,
	)

	def test_seed_utterance_length_leq_280(self) -> None: # U30
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	for s in range(100):
	g = generate(s, 3, weights)
	assert len(g.seed_utterance) <= 280


	# ---------------------------------------------------------------------------
	# §1.8 Malformed-fixture raise-site tests (U34–U39)
	# ---------------------------------------------------------------------------


	@pytest.mark.unit
	class TestErrorModes:
	def test_missing_slot_error(self) -> None: # U34
	# Build a library whose variant references an undeclared placeholder by
	# bypassing load_templates static-scan (we inject directly).
	bad_variant = "go to {destination}"
	tmpl = Template(
	template_id="airline.bad",
	domain="airline",
	intent="book_flight",
	min_stage=1,
	required_slots=("from", "to", "when"),
	optional_slots=(),
	slot_distributions={
	"from": SlotDistribution(kind="choices", choices=("HYD",)),
	"to": SlotDistribution(kind="choices", choices=("BLR",)),
	"when": SlotDistribution(kind="date"),
	},
	constraints_template={},
	drift_slot_tags=(),
	language_variants={
	"en": (bad_variant,),
	"hi": (bad_variant,),
	"ta": (bad_variant,),
	"kn": (bad_variant,),
	"hinglish": (bad_variant,),
	},
	)
	lib = TemplateLibrary(
	templates=(tmpl,),
	cities_by_domain={"airline": ("HYD", "BLR")},
	i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")},
	)
	tg.set_library_override(lib)
	with pytest.raises(MissingSlotError, match="destination"):
	generate(0, 1, {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0})

	def test_invalid_budget_error_from_step_misalignment(self) -> None: # U35
	# Feed _sample_slot_value a deliberately corrupt distribution that
	# would produce an out-of-range sample.
	import random

	dist = SlotDistribution(kind="uniform", low=100.0, high=250.0, step=70.0)

	class _BadRng(random.Random):
	def randint(self, a: int, b: int) -> int: # noqa: ARG002
	return 3 # 100 + 3*70 = 310 > 250

	with pytest.raises(InvalidBudgetError):
	tg._sample_slot_value(_BadRng(0), "budget_inr", dist, template_id="x")

	def test_template_schema_error_missing_required_key(self, tmp_path: Path) -> None: # U36
	(tmp_path / "templates.yaml").write_text(
	yaml.safe_dump([{"template_id": "x"}]), encoding="utf-8"
	)
	with pytest.raises(TemplateSchemaError):
	load_templates(tmp_path / "templates.yaml")

	def test_template_schema_error_bad_step_grid(self, tmp_path: Path) -> None: # U37
	bad_template: dict[str, Any] = {
	"template_id": "airline.bad",
	"domain": "airline",
	"intent": "book_flight",
	"min_stage": 1,
	"required_slots": [],
	"optional_slots": [],
	"constraints_template": {
	"budget_inr": {"distribution": "uniform", "low": 3000, "high": 15000, "step": 700}
	},
	"drift_slot_tags": [],
	"language_variants": {
	"en": ["hello"],
	"hi": ["नमस्ते"],
	"ta": ["வணக்கம்"],
	"kn": ["ನಮಸ್ಕಾರ"],
	"hinglish": ["namaste"],
	},
	}
	(tmp_path / "templates.yaml").write_text(
	yaml.safe_dump([bad_template], allow_unicode=True), encoding="utf-8"
	)
	with pytest.raises(TemplateSchemaError, match="misaligned"):
	load_templates(tmp_path / "templates.yaml")

	def test_unicode_normalization_error_defensive(self, monkeypatch: pytest.MonkeyPatch) -> None: # U38
	from cells import step_07_task_generator as mod

	monkeypatch.setattr(mod.unicodedata, "is_normalized", lambda a, *k: False)
	with pytest.raises(UnicodeNormalizationError):
	mod._assert_nfc("anything", where="test")

	def test_no_variant_for_language_error(self) -> None: # U39
	# Build a template with an empty variant tuple for Tamil (bypass loader).
	tmpl = Template(
	template_id="airline.missing_ta",
	domain="airline",
	intent="book_flight",
	min_stage=1,
	required_slots=("from", "to", "when"),
	optional_slots=(),
	slot_distributions={
	"from": SlotDistribution(kind="choices", choices=("HYD",)),
	"to": SlotDistribution(kind="choices", choices=("BLR",)),
	"when": SlotDistribution(kind="date"),
	},
	constraints_template={},
	drift_slot_tags=(),
	language_variants={
	"en": ("from {from} to {to} on {when}",),
	"hi": ("{from} से {to} {when}",),
	"ta": (), # intentionally empty
	"kn": ("{from} {to} {when}",),
	"hinglish": ("{from} to {to} {when}",),
	},
	)
	lib = TemplateLibrary(
	templates=(tmpl,),
	cities_by_domain={"airline": ("HYD", "BLR")},
	i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")},
	)
	tg.set_library_override(lib)
	weights = {"en": 0.0, "hi": 0.0, "ta": 1.0, "kn": 0.0, "hinglish": 0.0}
	with pytest.raises(NoVariantForLanguageError):
	generate(0, 1, weights)


	# ---------------------------------------------------------------------------
	# §2 Property tests (P1–P6)
	# ---------------------------------------------------------------------------


	def _language_weights_strategy() -> st.SearchStrategy[dict[str, float]]:
	langs = ("hi", "ta", "kn", "en", "hinglish")

	@st.composite
	def _impl(draw: st.DrawFn) -> dict[str, float]:
	raw = [
	draw(st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False))
	for _ in langs
	]
	total = sum(raw)
	return {lang: r / total for lang, r in zip(langs, raw, strict=True)}

	return _impl()


	@pytest.mark.property
	@given(
	seed=st.integers(min_value=0, max_value=2**62),
	stage=st.sampled_from([1, 2, 3]),
	weights=_language_weights_strategy(),
	)
	@settings(max_examples=150, deadline=None)
	def test_generate_is_pure(seed: int, stage: int, weights: dict[str, float]) -> None: # P1
	a = generate(seed, stage, weights) # type: ignore[arg-type]
	b = generate(seed, stage, weights) # type: ignore[arg-type]
	assert a == b
	assert a.seed_utterance == b.seed_utterance


	@pytest.mark.property
	@pytest.mark.slow
	def test_procedural_space_uniqueness_scan() -> None: # P2 (scaled down — slow)
	weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2}
	# Walk 5,000 distinct seeds (200k is gated behind -m slow in CI nightly).
	utterances = set()
	for s in range(5_000):
	utterances.add(generate(s, 3, weights).seed_utterance)
	# Collision rate < 10% at n=5k given the 4 domains × 5 templates × etc.
	assert len(utterances) >= 5_000 * 0.8


	@pytest.mark.property
	def test_language_distribution_chi_square_n10000() -> None: # P3
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	n = 10_000
	observed = Counter(generate(s, 3, weights).language for s in range(n))
	expected = {lang: p * n for lang, p in weights.items()}
	chi2 = sum(
	((observed.get(lang, 0) - expected[lang]) ** 2) / expected[lang]
	for lang in weights
	)
	# df=4, alpha=0.001 critical value ≈ 18.47
	assert chi2 < 18.47, f"chi-square {chi2:.2f} rejects null"


	@pytest.mark.property
	@given(seed=st.integers(min_value=0, max_value=10_000))
	@settings(max_examples=100, deadline=None)
	def test_stage_template_pool_monotone(seed: int) -> None: # P4
	weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
	g1 = generate(seed, 1, weights)
	assert len(g1.constraints) <= 2


	@pytest.mark.property
	@given(
	seed=st.integers(min_value=0, max_value=2**62),
	stage=st.sampled_from([1, 2, 3]),
	weights=_language_weights_strategy(),
	)
	@settings(max_examples=300, deadline=None)
	def test_seed_utterance_always_nfc(
	seed: int, stage: int, weights: dict[str, float]
	) -> None: # P5
	g = generate(seed, stage, weights) # type: ignore[arg-type]
	assert unicodedata.is_normalized("NFC", g.seed_utterance)
	for v in g.slots.values():
	if isinstance(v, str):
	assert unicodedata.is_normalized("NFC", v)


	@pytest.mark.property
	@given(
	seed=st.integers(min_value=0, max_value=10_000),
	stage=st.sampled_from([1, 2, 3]),
	)
	@settings(max_examples=200, deadline=None)
	def test_budget_within_declared_range(seed: int, stage: int) -> None: # P6
	weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
	g = generate(seed, stage, weights) # type: ignore[arg-type]
	if "budget_inr" in g.constraints:
	# Find any template in the library whose budget range could contain it.
	lib = tg._get_library()
	match = False
	for t in lib.templates:
	if "budget_inr" in t.constraints_template:
	dist = t.constraints_template["budget_inr"]
	assert dist.low is not None and dist.high is not None
	if dist.low <= g.constraints["budget_inr"] <= dist.high:
	match = True
	break
	assert match, (g.constraints, g.domain)


	# ---------------------------------------------------------------------------
	# §3 Integration tests (I1–I5) — use real fixture files written on disk
	# ---------------------------------------------------------------------------


	def _write_fixture_library(tmp_path: Path) -> None:
	"""Author a minimal real templates.yaml + i18n.yaml pair."""
	templates: list[dict[str, Any]] = [
	{
	"template_id": "airline.book.fixture_v1",
	"domain": "airline",
	"intent": "book_flight",
	"min_stage": 1,
	"required_slots": ["from", "to", "when"],
	"optional_slots": [],
	"slot_distributions": {
	"from": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]},
	"to": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]},
	"when": {"distribution": "date"},
	},
	"constraints_template": {
	"budget_inr": {
	"distribution": "uniform",
	"low": 3000,
	"high": 15000,
	"step": 500,
	},
	"time_window": {
	"choices": ["morning", "afternoon", "evening", "late_night"]
	},
	},
	"drift_slot_tags": ["price", "total_fare_inr"],
	"language_variants": {
	"hinglish": [
	"Bhai {when} ko {from} se {to}, {budget_inr} rupees max, {time_window}"
	],
	"hi": [
	"{when} को {from} से {to}, ₹{budget_inr} से कम, {time_window}"
	],
	"ta": [
	"{when} அன்று {from} லிருந்து {to}, ₹{budget_inr} கீழ், {time_window}"
	],
	"kn": [
	"{when} ರಂದು {from} ಇಂದ {to}, ₹{budget_inr} ಒಳಗೆ, {time_window}"
	],
	"en": [
	"Flight from {from} to {to} on {when}, under ₹{budget_inr}, {time_window}"
	],
	},
	},
	{
	"template_id": "cab.ride.fixture_v1",
	"domain": "cab",
	"intent": "book_cab",
	"min_stage": 1,
	"required_slots": ["pickup", "drop", "when"],
	"optional_slots": [],
	"slot_distributions": {
	"pickup": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]},
	"drop": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]},
	"when": {"distribution": "date"},
	},
	"constraints_template": {
	"budget_inr": {
	"distribution": "uniform",
	"low": 200,
	"high": 2000,
	"step": 50,
	}
	},
	"drift_slot_tags": ["fare_inr"],
	"language_variants": {
	"hinglish": ["{when} ko {pickup} se {drop} cab, {budget_inr} ke andar"],
	"hi": ["{when} को {pickup} से {drop}, ₹{budget_inr} के अंदर"],
	"ta": ["{when} அன்று {pickup} லிருந்து {drop}, ₹{budget_inr} கீழ்"],
	"kn": ["{when} ರಂದು {pickup} ಇಂದ {drop}, ₹{budget_inr} ಒಳಗೆ"],
	"en": ["Cab {pickup} to {drop} on {when}, under ₹{budget_inr}"],
	},
	},
	{
	"template_id": "restaurant.order.fixture_v1",
	"domain": "restaurant",
	"intent": "order_food",
	"min_stage": 1,
	"required_slots": ["city", "cuisine", "when"],
	"optional_slots": [],
	"slot_distributions": {
	"city": {"choices": ["HYD", "BLR", "DEL"]},
	"cuisine": {"choices": ["Biryani", "Dosa", "Pizza"]},
	"when": {"distribution": "date"},
	},
	"constraints_template": {
	"budget_inr": {
	"distribution": "uniform",
	"low": 200,
	"high": 1000,
	"step": 50,
	},
	"veg_only": {"distribution": "bool"},
	},
	"drift_slot_tags": ["min_order"],
	"language_variants": {
	"hinglish": [
	"{when} ko {city} mein {cuisine}, {budget_inr} max, veg={veg_only}"
	],
	"hi": [
	"{when} को {city} में {cuisine}, ₹{budget_inr}, veg={veg_only}"
	],
	"ta": [
	"{when} அன்று {city} இல் {cuisine}, ₹{budget_inr}, veg={veg_only}"
	],
	"kn": [
	"{when} ರಂದು {city} ನಲ್ಲಿ {cuisine}, ₹{budget_inr}, veg={veg_only}"
	],
	"en": [
	"Order {cuisine} in {city} on {when}, ₹{budget_inr}, veg={veg_only}"
	],
	},
	},
	{
	"template_id": "hotel.book.fixture_v1",
	"domain": "hotel",
	"intent": "book_hotel",
	"min_stage": 1,
	"required_slots": ["city", "checkin", "checkout"],
	"optional_slots": [],
	"slot_distributions": {
	"city": {"choices": ["HYD", "BLR", "GOI"]},
	"checkin": {"distribution": "date"},
	"checkout": {"distribution": "date"},
	},
	"constraints_template": {
	"budget_inr": {
	"distribution": "uniform",
	"low": 2000,
	"high": 10000,
	"step": 500,
	}
	},
	"drift_slot_tags": ["cancel_window"],
	"language_variants": {
	"hinglish": ["{city} {checkin}-{checkout}, ₹{budget_inr}/night"],
	"hi": ["{city} {checkin}-{checkout}, ₹{budget_inr} प्रति रात"],
	"ta": ["{city} {checkin}-{checkout}, ₹{budget_inr} இரவுக்கு"],
	"kn": ["{city} {checkin}-{checkout}, ₹{budget_inr} ಒಂದು ರಾತ್ರಿ"],
	"en": ["{city} {checkin} to {checkout}, ₹{budget_inr} per night"],
	},
	},
	{
	"template_id": "airline.book.compound_v1",
	"domain": "airline",
	"intent": "book_flight",
	"min_stage": 3,
	"required_slots": ["from", "to", "when"],
	"optional_slots": [],
	"slot_distributions": {
	"from": {"choices": ["HYD", "BLR", "DEL"]},
	"to": {"choices": ["HYD", "BLR", "DEL"]},
	"when": {"distribution": "date"},
	},
	"constraints_template": {
	"budget_inr": {
	"distribution": "uniform",
	"low": 3000,
	"high": 15000,
	"step": 500,
	},
	"time_window": {
	"choices": ["morning", "afternoon", "evening", "late_night"]
	},
	"passenger_count": {
	"distribution": "uniform",
	"low": 1,
	"high": 4,
	"step": 1,
	},
	},
	"drift_slot_tags": ["price", "passenger_count"],
	"language_variants": {
	"hinglish": [
	"{when} ko {from} se {to}, {passenger_count} log, ₹{budget_inr}, {time_window}"
	],
	"hi": [
	"{when} को {from} से {to}, {passenger_count} लोग, ₹{budget_inr}, {time_window}"
	],
	"ta": [
	"{when} அன்று {from} லிருந்து {to}, {passenger_count} பேர், ₹{budget_inr}, {time_window}"
	],
	"kn": [
	"{when} ರಂದು {from} ಇಂದ {to}, {passenger_count} ಜನ, ₹{budget_inr}, {time_window}"
	],
	"en": [
	"{from} to {to} on {when} for {passenger_count} pax, ₹{budget_inr}, {time_window}"
	],
	},
	},
	]
	(tmp_path / "templates.yaml").write_text(
	yaml.safe_dump(templates, allow_unicode=True, sort_keys=False), encoding="utf-8"
	)
	i18n: dict[str, Any] = {
	"hi": {
	"cities": {"BLR": "बेंगलुरु", "MAA": "चेन्नई", "HYD": "हैदराबाद"},
	"weekdays": {"monday": "सोमवार"},
	},
	"ta": {
	"cities": {"BLR": "பெங்களூரு", "MAA": "சென்னை"},
	"weekdays": {"monday": "திங்கட்கிழமை"},
	},
	"kn": {
	"cities": {"BLR": "ಬೆಂಗಳೂರು", "MAA": "ಚೆನ್ನೈ"},
	"weekdays": {"monday": "ಸೋಮವಾರ"},
	},
	"en": {"cities": {"BLR": "Bengaluru"}},
	"hinglish": {"cities": {"BLR": "Bengaluru"}},
	}
	(tmp_path / "i18n.yaml").write_text(
	yaml.safe_dump(i18n, allow_unicode=True, sort_keys=False), encoding="utf-8"
	)


	def _valid_goal_spec(g: GoalSpec) -> None:
	assert dataclasses.is_dataclass(g)
	assert g.domain in ("airline", "cab", "restaurant", "hotel")
	assert g.language in ("hi", "ta", "kn", "en", "hinglish")
	assert unicodedata.is_normalized("NFC", g.seed_utterance)
	assert len(g.seed_utterance) <= 280
	assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None


	@pytest.mark.integration
	class TestIntegration:
	def test_load_templates_from_fixture(self, tmp_path: Path) -> None: # I1
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	assert isinstance(lib, TemplateLibrary)
	assert len({t.domain for t in lib.templates}) == 4
	assert len(lib.templates) == 5
	for lang in ("hi", "ta", "kn", "en", "hinglish"):
	assert lang in lib.i18n

	def test_100_briefs_pass_goal_spec_invariants(self, tmp_path: Path) -> None: # I2
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	tg.set_library_override(lib)
	weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
	for s in range(100):
	g = generate(s, 3, weights)
	_valid_goal_spec(g)

	def test_enumerate_variants_stable_order(self, tmp_path: Path) -> None: # I3
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	tg.set_library_override(lib)
	weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2}
	a = list(enumerate_variants(limit=200, stage=3, language_weights=weights))
	b = list(enumerate_variants(limit=200, stage=3, language_weights=weights))
	assert [g.seed_utterance for g in a] == [g.seed_utterance for g in b]

	@pytest.mark.parametrize(
	"lang,expected_block,forbidden_block",
	[
	("hi", (0x0900, 0x097F), (0x0B80, 0x0BFF)),
	("ta", (0x0B80, 0x0BFF), (0x0900, 0x097F)),
	("kn", (0x0C80, 0x0CFF), (0x0900, 0x097F)),
	],
	)
	def test_indic_script_isolation(
	self,
	tmp_path: Path,
	lang: str,
	expected_block: tuple[int, int],
	forbidden_block: tuple[int, int],
	) -> None: # I4
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	tg.set_library_override(lib)
	weights = {c: (1.0 if c == lang else 0.0) for c in ("hi", "ta", "kn", "en", "hinglish")}
	for s in range(50):
	g = generate(s, 2, weights)
	lo, hi = expected_block
	assert any(lo <= ord(c) <= hi for c in g.seed_utterance), g.seed_utterance
	fo, fh = forbidden_block
	assert not any(fo <= ord(c) <= fh for c in g.seed_utterance), g.seed_utterance

	def test_hinglish_never_contains_devanagari(self, tmp_path: Path) -> None: # I5
	_write_fixture_library(tmp_path)
	lib = load_templates(tmp_path / "templates.yaml")
	tg.set_library_override(lib)
	weights = {"hinglish": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "en": 0.0}
	for s in range(100):
	g = generate(s, 3, weights)
	assert not any(0x0900 <= ord(c) <= 0x097F for c in g.seed_utterance)