"""Tests for cells/step_07_task_generator.py. Implements docs/tests/task_generator_tests.md: - 30 unit tests (U1–U30, U34–U39) - 6 hypothesis property tests (P1–P6) - 5 integration tests (I1–I5) """ from __future__ import annotations import dataclasses import hashlib import re import unicodedata from collections import Counter from math import sqrt from typing import TYPE_CHECKING, Any import pytest import yaml from hypothesis import given, settings from hypothesis import strategies as st from cells import step_07_task_generator as tg from cells.step_07_task_generator import ( InvalidBudgetError, InvalidLanguageError, InvalidLanguageWeightError, InvalidStageError, MissingSlotError, NoVariantForLanguageError, SlotDistribution, Template, TemplateFileMissingError, TemplateLibrary, TemplateSchemaError, UnicodeNormalizationError, enumerate_variants, generate, load_templates, stable_sub_seed, ) if TYPE_CHECKING: from pathlib import Path from cells.step_04_models import GoalSpec # --------------------------------------------------------------------------- # Shared fixtures / weight constants (§5.3 of the test plan) # --------------------------------------------------------------------------- STAGE_1_WEIGHTS: dict[str, float] = { "en": 0.50, "hi": 0.30, "hinglish": 0.20, "ta": 0.00, "kn": 0.00, } STAGE_2_WEIGHTS: dict[str, float] = { "en": 0.30, "hi": 0.30, "hinglish": 0.20, "ta": 0.10, "kn": 0.10, } STAGE_3_WEIGHTS: dict[str, float] = { "en": 0.30, "hi": 0.30, "hinglish": 0.20, "ta": 0.10, "kn": 0.10, } @pytest.fixture(autouse=True) def _install_test_library(tmp_path_factory: pytest.TempPathFactory) -> Any: """Install a fully-wired fixture library for every test. Tests that need the production ``data/task_briefs/templates.yaml`` or a custom library override must call ``tg.set_library_override()`` inside the test body — this fixture only sets the default. """ tg.set_library_override(None) tg.reset_library_cache() fixture_dir = tmp_path_factory.mktemp("task_gen_fixture") _write_fixture_library(fixture_dir) lib = load_templates(fixture_dir / "templates.yaml") tg.set_library_override(lib) yield tg.set_library_override(None) tg.reset_library_cache() # --------------------------------------------------------------------------- # §1.1 Determinism (U1–U5) # --------------------------------------------------------------------------- @pytest.mark.unit class TestDeterminism: def test_generate_same_seed_same_goalspec(self) -> None: # U1 first = generate(42, 1, STAGE_1_WEIGHTS) for _ in range(99): assert generate(42, 1, STAGE_1_WEIGHTS) == first def test_generate_byte_identical_seed_utterance_after_nfc(self) -> None: # U2 first_bytes = generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8") for _ in range(99): assert ( generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8") == first_bytes ) def test_generate_different_seeds_different_episodes(self) -> None: # U3 results = [generate(s, 3, STAGE_3_WEIGHTS) for s in range(100)] assert len({g.seed_utterance for g in results}) > 90 def test_generate_stage_changes_template_pool(self) -> None: # U4 g1 = generate(42, 1, STAGE_3_WEIGHTS) g3 = generate(42, 3, STAGE_3_WEIGHTS) assert len(g1.constraints) <= 2 assert len(g3.constraints) <= 4 def test_generate_returns_frozen_goalspec(self) -> None: # U5 g = generate(42, 1, STAGE_1_WEIGHTS) assert dataclasses.is_dataclass(g) assert g.__dataclass_params__.frozen is True # type: ignore[attr-defined] # --------------------------------------------------------------------------- # §1.2 Stage-aware constraint counts (U6–U8) # --------------------------------------------------------------------------- @pytest.mark.unit class TestStageConstraintCounts: def test_stage_1_constraint_count_leq_2(self) -> None: # U6 for s in range(200): g = generate(s, 1, STAGE_1_WEIGHTS) assert len(g.constraints) <= 2, (s, g.constraints) def test_stage_2_constraint_count_leq_3(self) -> None: # U7 for s in range(200): g = generate(s, 2, STAGE_2_WEIGHTS) assert len(g.constraints) <= 3, (s, g.constraints) def test_stage_3_constraint_count_leq_4(self) -> None: # U8 for s in range(200): g = generate(s, 3, STAGE_3_WEIGHTS) assert len(g.constraints) <= 4, (s, g.constraints) # --------------------------------------------------------------------------- # §1.3 Language-weight distribution (U9, U10) # --------------------------------------------------------------------------- @pytest.mark.unit class TestLanguageWeightDistribution: def test_language_weights_sampled_distribution_matches_at_n1000(self) -> None: # U9 weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} n = 1000 counts = Counter( generate(s, 3, weights).language for s in range(n) ) for lang, p in weights.items(): observed = counts.get(lang, 0) / n # ±3σ tolerance to avoid flakiness while still catching implementation bugs. sigma = sqrt(p * (1 - p) / n) assert abs(observed - p) < 3 * sigma + 1e-6, (lang, observed, p) def test_language_weights_zero_keys_never_drawn(self) -> None: # U10 weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} for s in range(500): assert generate(s, 3, weights).language == "en" # --------------------------------------------------------------------------- # §1.4 Validation exceptions (U11–U19) # --------------------------------------------------------------------------- @pytest.mark.unit class TestValidationExceptions: def test_invalid_language_error_on_unsupported_key(self) -> None: # U11 with pytest.raises(InvalidLanguageError): generate(0, 1, {"hindi": 1.0}) # type: ignore[dict-item] def test_invalid_language_error_on_marathi_key(self) -> None: # U12 with pytest.raises(InvalidLanguageError, match="marathi"): generate(0, 1, {"en": 0.5, "marathi": 0.5}) # type: ignore[dict-item] def test_invalid_language_weight_error_empty_dict(self) -> None: # U13 with pytest.raises(InvalidLanguageWeightError): generate(0, 1, {}) def test_invalid_language_weight_error_negative_value(self) -> None: # U14 with pytest.raises(InvalidLanguageWeightError): generate(0, 1, {"en": 1.5, "hi": -0.5}) def test_invalid_language_weight_error_sum_mismatch_low(self) -> None: # U15 with pytest.raises(InvalidLanguageWeightError): generate(0, 1, {"en": 0.5, "hi": 0.3}) def test_invalid_language_weight_error_sum_mismatch_high(self) -> None: # U16 with pytest.raises(InvalidLanguageWeightError): generate(0, 1, {"en": 0.7, "hi": 0.5}) def test_invalid_language_weight_error_all_zero(self) -> None: # U17 # Direct all-zero (sum 0) triggers the sum-mismatch branch; # the all-zero defensive branch is covered via a weights dict that # normalizes to 1.0 via floating-point noise. We assert via sum=1 # impossible with all zeros, so instead patch: use empty-style. # The design specifies *defensive redundant* check — to exercise it # directly, we call the private validator with a hand-crafted input # that the sum-check would otherwise let through. with pytest.raises(InvalidLanguageWeightError): tg._validate_language_weights( {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} ) @pytest.mark.parametrize("bad_stage", [0, 4, -1]) def test_invalid_stage_error(self, bad_stage: int) -> None: # U18 with pytest.raises(InvalidStageError): generate(0, bad_stage, STAGE_1_WEIGHTS) # type: ignore[arg-type] def test_template_file_missing_error(self, tmp_path: Path) -> None: # U19 with pytest.raises(TemplateFileMissingError): load_templates(tmp_path / "does_not_exist.yaml") # --------------------------------------------------------------------------- # §1.5 Unicode NFC (U20–U24) # --------------------------------------------------------------------------- def _single_lang_weights(code: str) -> dict[str, float]: return {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | {code: 1.0} @pytest.mark.unit class TestNFC: def test_seed_utterance_is_nfc_for_every_language(self) -> None: # U20 for code in ("hi", "ta", "kn", "en", "hinglish"): g = generate(7, 2, _single_lang_weights(code)) assert unicodedata.is_normalized("NFC", g.seed_utterance) def test_slotgrid_string_values_are_nfc(self) -> None: # U21 weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} for s in range(50): g = generate(s, 3, weights) for v in g.slots.values(): if isinstance(v, str): assert unicodedata.is_normalized("NFC", v), (s, v) def test_i18n_yaml_loaded_values_are_nfc(self, tmp_path: Path) -> None: # U22 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") for _lang, block in lib.i18n.items(): for v in block.values(): assert unicodedata.is_normalized("NFC", v) def test_templates_yaml_variant_strings_are_nfc_post_load( self, tmp_path: Path ) -> None: # U23 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") for t in lib.templates: for variants in t.language_variants.values(): for v in variants: assert unicodedata.is_normalized("NFC", v) def test_nfd_input_renormalized_to_nfc_on_load(self, tmp_path: Path) -> None: # U24 _write_fixture_library(tmp_path) # Overwrite one variant with NFD-encoded text. nfd_kannada = unicodedata.normalize("NFD", "ಬೆಂಗಳೂರು") assert not unicodedata.is_normalized("NFC", nfd_kannada) or True # NFC may equal NFD for this str yaml_path = tmp_path / "i18n.yaml" data = { "hi": {"cities": {"BLR": unicodedata.normalize("NFD", "बेंगलुरु")}}, "ta": {"cities": {"BLR": "பெங்களூரு"}}, "kn": {"cities": {"BLR": nfd_kannada}}, "en": {"cities": {"BLR": "Bengaluru"}}, "hinglish": {"cities": {"BLR": "Bengaluru"}}, } yaml_path.write_text(yaml.safe_dump(data, allow_unicode=True), encoding="utf-8") lib = load_templates(tmp_path / "templates.yaml") for _lang, block in lib.i18n.items(): for v in block.values(): assert unicodedata.is_normalized("NFC", v) # --------------------------------------------------------------------------- # §1.6 stable_sub_seed domain separation (U25–U28) # --------------------------------------------------------------------------- @pytest.mark.unit class TestSubSeed: def test_stable_sub_seed_formula(self) -> None: # U25 expected = int.from_bytes( hashlib.blake2b(b"42:domain", digest_size=8).digest(), "big" ) assert stable_sub_seed(42, "domain") == expected def test_sub_seed_tags_differ_per_decision(self) -> None: # U26 tags = ["domain", "template", "slots", "language", "variant"] out = {stable_sub_seed(42, t) for t in tags} assert len(out) == 5 def test_sub_seed_stable_across_runs(self) -> None: # U27 a = stable_sub_seed(42, "domain") b = stable_sub_seed(42, "domain") assert a == b def test_sub_seed_different_seed_different_output(self) -> None: # U28 assert stable_sub_seed(42, "domain") != stable_sub_seed(43, "domain") # --------------------------------------------------------------------------- # §1.7 Structural invariants (U29, U30) # --------------------------------------------------------------------------- @pytest.mark.unit class TestStructuralInvariants: def test_seed_utterance_has_no_unresolved_placeholders(self) -> None: # U29 weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} for s in range(100): g = generate(s, 3, weights) assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None, ( s, g.seed_utterance, ) def test_seed_utterance_length_leq_280(self) -> None: # U30 weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} for s in range(100): g = generate(s, 3, weights) assert len(g.seed_utterance) <= 280 # --------------------------------------------------------------------------- # §1.8 Malformed-fixture raise-site tests (U34–U39) # --------------------------------------------------------------------------- @pytest.mark.unit class TestErrorModes: def test_missing_slot_error(self) -> None: # U34 # Build a library whose variant references an undeclared placeholder by # bypassing load_templates static-scan (we inject directly). bad_variant = "go to {destination}" tmpl = Template( template_id="airline.bad", domain="airline", intent="book_flight", min_stage=1, required_slots=("from", "to", "when"), optional_slots=(), slot_distributions={ "from": SlotDistribution(kind="choices", choices=("HYD",)), "to": SlotDistribution(kind="choices", choices=("BLR",)), "when": SlotDistribution(kind="date"), }, constraints_template={}, drift_slot_tags=(), language_variants={ "en": (bad_variant,), "hi": (bad_variant,), "ta": (bad_variant,), "kn": (bad_variant,), "hinglish": (bad_variant,), }, ) lib = TemplateLibrary( templates=(tmpl,), cities_by_domain={"airline": ("HYD", "BLR")}, i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")}, ) tg.set_library_override(lib) with pytest.raises(MissingSlotError, match="destination"): generate(0, 1, {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}) def test_invalid_budget_error_from_step_misalignment(self) -> None: # U35 # Feed _sample_slot_value a deliberately corrupt distribution that # would produce an out-of-range sample. import random dist = SlotDistribution(kind="uniform", low=100.0, high=250.0, step=70.0) class _BadRng(random.Random): def randint(self, a: int, b: int) -> int: # noqa: ARG002 return 3 # 100 + 3*70 = 310 > 250 with pytest.raises(InvalidBudgetError): tg._sample_slot_value(_BadRng(0), "budget_inr", dist, template_id="x") def test_template_schema_error_missing_required_key(self, tmp_path: Path) -> None: # U36 (tmp_path / "templates.yaml").write_text( yaml.safe_dump([{"template_id": "x"}]), encoding="utf-8" ) with pytest.raises(TemplateSchemaError): load_templates(tmp_path / "templates.yaml") def test_template_schema_error_bad_step_grid(self, tmp_path: Path) -> None: # U37 bad_template: dict[str, Any] = { "template_id": "airline.bad", "domain": "airline", "intent": "book_flight", "min_stage": 1, "required_slots": [], "optional_slots": [], "constraints_template": { "budget_inr": {"distribution": "uniform", "low": 3000, "high": 15000, "step": 700} }, "drift_slot_tags": [], "language_variants": { "en": ["hello"], "hi": ["नमस्ते"], "ta": ["வணக்கம்"], "kn": ["ನಮಸ್ಕಾರ"], "hinglish": ["namaste"], }, } (tmp_path / "templates.yaml").write_text( yaml.safe_dump([bad_template], allow_unicode=True), encoding="utf-8" ) with pytest.raises(TemplateSchemaError, match="misaligned"): load_templates(tmp_path / "templates.yaml") def test_unicode_normalization_error_defensive(self, monkeypatch: pytest.MonkeyPatch) -> None: # U38 from cells import step_07_task_generator as mod monkeypatch.setattr(mod.unicodedata, "is_normalized", lambda *a, **k: False) with pytest.raises(UnicodeNormalizationError): mod._assert_nfc("anything", where="test") def test_no_variant_for_language_error(self) -> None: # U39 # Build a template with an empty variant tuple for Tamil (bypass loader). tmpl = Template( template_id="airline.missing_ta", domain="airline", intent="book_flight", min_stage=1, required_slots=("from", "to", "when"), optional_slots=(), slot_distributions={ "from": SlotDistribution(kind="choices", choices=("HYD",)), "to": SlotDistribution(kind="choices", choices=("BLR",)), "when": SlotDistribution(kind="date"), }, constraints_template={}, drift_slot_tags=(), language_variants={ "en": ("from {from} to {to} on {when}",), "hi": ("{from} से {to} {when}",), "ta": (), # intentionally empty "kn": ("{from} {to} {when}",), "hinglish": ("{from} to {to} {when}",), }, ) lib = TemplateLibrary( templates=(tmpl,), cities_by_domain={"airline": ("HYD", "BLR")}, i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")}, ) tg.set_library_override(lib) weights = {"en": 0.0, "hi": 0.0, "ta": 1.0, "kn": 0.0, "hinglish": 0.0} with pytest.raises(NoVariantForLanguageError): generate(0, 1, weights) # --------------------------------------------------------------------------- # §2 Property tests (P1–P6) # --------------------------------------------------------------------------- def _language_weights_strategy() -> st.SearchStrategy[dict[str, float]]: langs = ("hi", "ta", "kn", "en", "hinglish") @st.composite def _impl(draw: st.DrawFn) -> dict[str, float]: raw = [ draw(st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False)) for _ in langs ] total = sum(raw) return {lang: r / total for lang, r in zip(langs, raw, strict=True)} return _impl() @pytest.mark.property @given( seed=st.integers(min_value=0, max_value=2**62), stage=st.sampled_from([1, 2, 3]), weights=_language_weights_strategy(), ) @settings(max_examples=150, deadline=None) def test_generate_is_pure(seed: int, stage: int, weights: dict[str, float]) -> None: # P1 a = generate(seed, stage, weights) # type: ignore[arg-type] b = generate(seed, stage, weights) # type: ignore[arg-type] assert a == b assert a.seed_utterance == b.seed_utterance @pytest.mark.property @pytest.mark.slow def test_procedural_space_uniqueness_scan() -> None: # P2 (scaled down — slow) weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2} # Walk 5,000 distinct seeds (200k is gated behind -m slow in CI nightly). utterances = set() for s in range(5_000): utterances.add(generate(s, 3, weights).seed_utterance) # Collision rate < 10% at n=5k given the 4 domains × 5 templates × etc. assert len(utterances) >= 5_000 * 0.8 @pytest.mark.property def test_language_distribution_chi_square_n10000() -> None: # P3 weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} n = 10_000 observed = Counter(generate(s, 3, weights).language for s in range(n)) expected = {lang: p * n for lang, p in weights.items()} chi2 = sum( ((observed.get(lang, 0) - expected[lang]) ** 2) / expected[lang] for lang in weights ) # df=4, alpha=0.001 critical value ≈ 18.47 assert chi2 < 18.47, f"chi-square {chi2:.2f} rejects null" @pytest.mark.property @given(seed=st.integers(min_value=0, max_value=10_000)) @settings(max_examples=100, deadline=None) def test_stage_template_pool_monotone(seed: int) -> None: # P4 weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} g1 = generate(seed, 1, weights) assert len(g1.constraints) <= 2 @pytest.mark.property @given( seed=st.integers(min_value=0, max_value=2**62), stage=st.sampled_from([1, 2, 3]), weights=_language_weights_strategy(), ) @settings(max_examples=300, deadline=None) def test_seed_utterance_always_nfc( seed: int, stage: int, weights: dict[str, float] ) -> None: # P5 g = generate(seed, stage, weights) # type: ignore[arg-type] assert unicodedata.is_normalized("NFC", g.seed_utterance) for v in g.slots.values(): if isinstance(v, str): assert unicodedata.is_normalized("NFC", v) @pytest.mark.property @given( seed=st.integers(min_value=0, max_value=10_000), stage=st.sampled_from([1, 2, 3]), ) @settings(max_examples=200, deadline=None) def test_budget_within_declared_range(seed: int, stage: int) -> None: # P6 weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} g = generate(seed, stage, weights) # type: ignore[arg-type] if "budget_inr" in g.constraints: # Find any template in the library whose budget range could contain it. lib = tg._get_library() match = False for t in lib.templates: if "budget_inr" in t.constraints_template: dist = t.constraints_template["budget_inr"] assert dist.low is not None and dist.high is not None if dist.low <= g.constraints["budget_inr"] <= dist.high: match = True break assert match, (g.constraints, g.domain) # --------------------------------------------------------------------------- # §3 Integration tests (I1–I5) — use real fixture files written on disk # --------------------------------------------------------------------------- def _write_fixture_library(tmp_path: Path) -> None: """Author a minimal real templates.yaml + i18n.yaml pair.""" templates: list[dict[str, Any]] = [ { "template_id": "airline.book.fixture_v1", "domain": "airline", "intent": "book_flight", "min_stage": 1, "required_slots": ["from", "to", "when"], "optional_slots": [], "slot_distributions": { "from": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]}, "to": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]}, "when": {"distribution": "date"}, }, "constraints_template": { "budget_inr": { "distribution": "uniform", "low": 3000, "high": 15000, "step": 500, }, "time_window": { "choices": ["morning", "afternoon", "evening", "late_night"] }, }, "drift_slot_tags": ["price", "total_fare_inr"], "language_variants": { "hinglish": [ "Bhai {when} ko {from} se {to}, {budget_inr} rupees max, {time_window}" ], "hi": [ "{when} को {from} से {to}, ₹{budget_inr} से कम, {time_window}" ], "ta": [ "{when} அன்று {from} லிருந்து {to}, ₹{budget_inr} கீழ், {time_window}" ], "kn": [ "{when} ರಂದು {from} ಇಂದ {to}, ₹{budget_inr} ಒಳಗೆ, {time_window}" ], "en": [ "Flight from {from} to {to} on {when}, under ₹{budget_inr}, {time_window}" ], }, }, { "template_id": "cab.ride.fixture_v1", "domain": "cab", "intent": "book_cab", "min_stage": 1, "required_slots": ["pickup", "drop", "when"], "optional_slots": [], "slot_distributions": { "pickup": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]}, "drop": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]}, "when": {"distribution": "date"}, }, "constraints_template": { "budget_inr": { "distribution": "uniform", "low": 200, "high": 2000, "step": 50, } }, "drift_slot_tags": ["fare_inr"], "language_variants": { "hinglish": ["{when} ko {pickup} se {drop} cab, {budget_inr} ke andar"], "hi": ["{when} को {pickup} से {drop}, ₹{budget_inr} के अंदर"], "ta": ["{when} அன்று {pickup} லிருந்து {drop}, ₹{budget_inr} கீழ்"], "kn": ["{when} ರಂದು {pickup} ಇಂದ {drop}, ₹{budget_inr} ಒಳಗೆ"], "en": ["Cab {pickup} to {drop} on {when}, under ₹{budget_inr}"], }, }, { "template_id": "restaurant.order.fixture_v1", "domain": "restaurant", "intent": "order_food", "min_stage": 1, "required_slots": ["city", "cuisine", "when"], "optional_slots": [], "slot_distributions": { "city": {"choices": ["HYD", "BLR", "DEL"]}, "cuisine": {"choices": ["Biryani", "Dosa", "Pizza"]}, "when": {"distribution": "date"}, }, "constraints_template": { "budget_inr": { "distribution": "uniform", "low": 200, "high": 1000, "step": 50, }, "veg_only": {"distribution": "bool"}, }, "drift_slot_tags": ["min_order"], "language_variants": { "hinglish": [ "{when} ko {city} mein {cuisine}, {budget_inr} max, veg={veg_only}" ], "hi": [ "{when} को {city} में {cuisine}, ₹{budget_inr}, veg={veg_only}" ], "ta": [ "{when} அன்று {city} இல் {cuisine}, ₹{budget_inr}, veg={veg_only}" ], "kn": [ "{when} ರಂದು {city} ನಲ್ಲಿ {cuisine}, ₹{budget_inr}, veg={veg_only}" ], "en": [ "Order {cuisine} in {city} on {when}, ₹{budget_inr}, veg={veg_only}" ], }, }, { "template_id": "hotel.book.fixture_v1", "domain": "hotel", "intent": "book_hotel", "min_stage": 1, "required_slots": ["city", "checkin", "checkout"], "optional_slots": [], "slot_distributions": { "city": {"choices": ["HYD", "BLR", "GOI"]}, "checkin": {"distribution": "date"}, "checkout": {"distribution": "date"}, }, "constraints_template": { "budget_inr": { "distribution": "uniform", "low": 2000, "high": 10000, "step": 500, } }, "drift_slot_tags": ["cancel_window"], "language_variants": { "hinglish": ["{city} {checkin}-{checkout}, ₹{budget_inr}/night"], "hi": ["{city} {checkin}-{checkout}, ₹{budget_inr} प्रति रात"], "ta": ["{city} {checkin}-{checkout}, ₹{budget_inr} இரவுக்கு"], "kn": ["{city} {checkin}-{checkout}, ₹{budget_inr} ಒಂದು ರಾತ್ರಿ"], "en": ["{city} {checkin} to {checkout}, ₹{budget_inr} per night"], }, }, { "template_id": "airline.book.compound_v1", "domain": "airline", "intent": "book_flight", "min_stage": 3, "required_slots": ["from", "to", "when"], "optional_slots": [], "slot_distributions": { "from": {"choices": ["HYD", "BLR", "DEL"]}, "to": {"choices": ["HYD", "BLR", "DEL"]}, "when": {"distribution": "date"}, }, "constraints_template": { "budget_inr": { "distribution": "uniform", "low": 3000, "high": 15000, "step": 500, }, "time_window": { "choices": ["morning", "afternoon", "evening", "late_night"] }, "passenger_count": { "distribution": "uniform", "low": 1, "high": 4, "step": 1, }, }, "drift_slot_tags": ["price", "passenger_count"], "language_variants": { "hinglish": [ "{when} ko {from} se {to}, {passenger_count} log, ₹{budget_inr}, {time_window}" ], "hi": [ "{when} को {from} से {to}, {passenger_count} लोग, ₹{budget_inr}, {time_window}" ], "ta": [ "{when} அன்று {from} லிருந்து {to}, {passenger_count} பேர், ₹{budget_inr}, {time_window}" ], "kn": [ "{when} ರಂದು {from} ಇಂದ {to}, {passenger_count} ಜನ, ₹{budget_inr}, {time_window}" ], "en": [ "{from} to {to} on {when} for {passenger_count} pax, ₹{budget_inr}, {time_window}" ], }, }, ] (tmp_path / "templates.yaml").write_text( yaml.safe_dump(templates, allow_unicode=True, sort_keys=False), encoding="utf-8" ) i18n: dict[str, Any] = { "hi": { "cities": {"BLR": "बेंगलुरु", "MAA": "चेन्नई", "HYD": "हैदराबाद"}, "weekdays": {"monday": "सोमवार"}, }, "ta": { "cities": {"BLR": "பெங்களூரு", "MAA": "சென்னை"}, "weekdays": {"monday": "திங்கட்கிழமை"}, }, "kn": { "cities": {"BLR": "ಬೆಂಗಳೂರು", "MAA": "ಚೆನ್ನೈ"}, "weekdays": {"monday": "ಸೋಮವಾರ"}, }, "en": {"cities": {"BLR": "Bengaluru"}}, "hinglish": {"cities": {"BLR": "Bengaluru"}}, } (tmp_path / "i18n.yaml").write_text( yaml.safe_dump(i18n, allow_unicode=True, sort_keys=False), encoding="utf-8" ) def _valid_goal_spec(g: GoalSpec) -> None: assert dataclasses.is_dataclass(g) assert g.domain in ("airline", "cab", "restaurant", "hotel") assert g.language in ("hi", "ta", "kn", "en", "hinglish") assert unicodedata.is_normalized("NFC", g.seed_utterance) assert len(g.seed_utterance) <= 280 assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None @pytest.mark.integration class TestIntegration: def test_load_templates_from_fixture(self, tmp_path: Path) -> None: # I1 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") assert isinstance(lib, TemplateLibrary) assert len({t.domain for t in lib.templates}) == 4 assert len(lib.templates) == 5 for lang in ("hi", "ta", "kn", "en", "hinglish"): assert lang in lib.i18n def test_100_briefs_pass_goal_spec_invariants(self, tmp_path: Path) -> None: # I2 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") tg.set_library_override(lib) weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} for s in range(100): g = generate(s, 3, weights) _valid_goal_spec(g) def test_enumerate_variants_stable_order(self, tmp_path: Path) -> None: # I3 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") tg.set_library_override(lib) weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2} a = list(enumerate_variants(limit=200, stage=3, language_weights=weights)) b = list(enumerate_variants(limit=200, stage=3, language_weights=weights)) assert [g.seed_utterance for g in a] == [g.seed_utterance for g in b] @pytest.mark.parametrize( "lang,expected_block,forbidden_block", [ ("hi", (0x0900, 0x097F), (0x0B80, 0x0BFF)), ("ta", (0x0B80, 0x0BFF), (0x0900, 0x097F)), ("kn", (0x0C80, 0x0CFF), (0x0900, 0x097F)), ], ) def test_indic_script_isolation( self, tmp_path: Path, lang: str, expected_block: tuple[int, int], forbidden_block: tuple[int, int], ) -> None: # I4 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") tg.set_library_override(lib) weights = {c: (1.0 if c == lang else 0.0) for c in ("hi", "ta", "kn", "en", "hinglish")} for s in range(50): g = generate(s, 2, weights) lo, hi = expected_block assert any(lo <= ord(c) <= hi for c in g.seed_utterance), g.seed_utterance fo, fh = forbidden_block assert not any(fo <= ord(c) <= fh for c in g.seed_utterance), g.seed_utterance def test_hinglish_never_contains_devanagari(self, tmp_path: Path) -> None: # I5 _write_fixture_library(tmp_path) lib = load_templates(tmp_path / "templates.yaml") tg.set_library_override(lib) weights = {"hinglish": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "en": 0.0} for s in range(100): g = generate(s, 3, weights) assert not any(0x0900 <= ord(c) <= 0x097F for c in g.seed_utterance)