Spaces:
Paused
Paused
| """Tests for cells/step_07_task_generator.py. | |
| Implements docs/tests/task_generator_tests.md: | |
| - 30 unit tests (U1–U30, U34–U39) | |
| - 6 hypothesis property tests (P1–P6) | |
| - 5 integration tests (I1–I5) | |
| """ | |
| from __future__ import annotations | |
| import dataclasses | |
| import hashlib | |
| import re | |
| import unicodedata | |
| from collections import Counter | |
| from math import sqrt | |
| from typing import TYPE_CHECKING, Any | |
| import pytest | |
| import yaml | |
| from hypothesis import given, settings | |
| from hypothesis import strategies as st | |
| from cells import step_07_task_generator as tg | |
| from cells.step_07_task_generator import ( | |
| InvalidBudgetError, | |
| InvalidLanguageError, | |
| InvalidLanguageWeightError, | |
| InvalidStageError, | |
| MissingSlotError, | |
| NoVariantForLanguageError, | |
| SlotDistribution, | |
| Template, | |
| TemplateFileMissingError, | |
| TemplateLibrary, | |
| TemplateSchemaError, | |
| UnicodeNormalizationError, | |
| enumerate_variants, | |
| generate, | |
| load_templates, | |
| stable_sub_seed, | |
| ) | |
| if TYPE_CHECKING: | |
| from pathlib import Path | |
| from cells.step_04_models import GoalSpec | |
| # --------------------------------------------------------------------------- | |
| # Shared fixtures / weight constants (§5.3 of the test plan) | |
| # --------------------------------------------------------------------------- | |
| STAGE_1_WEIGHTS: dict[str, float] = { | |
| "en": 0.50, | |
| "hi": 0.30, | |
| "hinglish": 0.20, | |
| "ta": 0.00, | |
| "kn": 0.00, | |
| } | |
| STAGE_2_WEIGHTS: dict[str, float] = { | |
| "en": 0.30, | |
| "hi": 0.30, | |
| "hinglish": 0.20, | |
| "ta": 0.10, | |
| "kn": 0.10, | |
| } | |
| STAGE_3_WEIGHTS: dict[str, float] = { | |
| "en": 0.30, | |
| "hi": 0.30, | |
| "hinglish": 0.20, | |
| "ta": 0.10, | |
| "kn": 0.10, | |
| } | |
| def _install_test_library(tmp_path_factory: pytest.TempPathFactory) -> Any: | |
| """Install a fully-wired fixture library for every test. | |
| Tests that need the production ``data/task_briefs/templates.yaml`` or a | |
| custom library override must call ``tg.set_library_override()`` inside | |
| the test body — this fixture only sets the default. | |
| """ | |
| tg.set_library_override(None) | |
| tg.reset_library_cache() | |
| fixture_dir = tmp_path_factory.mktemp("task_gen_fixture") | |
| _write_fixture_library(fixture_dir) | |
| lib = load_templates(fixture_dir / "templates.yaml") | |
| tg.set_library_override(lib) | |
| yield | |
| tg.set_library_override(None) | |
| tg.reset_library_cache() | |
| # --------------------------------------------------------------------------- | |
| # §1.1 Determinism (U1–U5) | |
| # --------------------------------------------------------------------------- | |
| class TestDeterminism: | |
| def test_generate_same_seed_same_goalspec(self) -> None: # U1 | |
| first = generate(42, 1, STAGE_1_WEIGHTS) | |
| for _ in range(99): | |
| assert generate(42, 1, STAGE_1_WEIGHTS) == first | |
| def test_generate_byte_identical_seed_utterance_after_nfc(self) -> None: # U2 | |
| first_bytes = generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8") | |
| for _ in range(99): | |
| assert ( | |
| generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8") | |
| == first_bytes | |
| ) | |
| def test_generate_different_seeds_different_episodes(self) -> None: # U3 | |
| results = [generate(s, 3, STAGE_3_WEIGHTS) for s in range(100)] | |
| assert len({g.seed_utterance for g in results}) > 90 | |
| def test_generate_stage_changes_template_pool(self) -> None: # U4 | |
| g1 = generate(42, 1, STAGE_3_WEIGHTS) | |
| g3 = generate(42, 3, STAGE_3_WEIGHTS) | |
| assert len(g1.constraints) <= 2 | |
| assert len(g3.constraints) <= 4 | |
| def test_generate_returns_frozen_goalspec(self) -> None: # U5 | |
| g = generate(42, 1, STAGE_1_WEIGHTS) | |
| assert dataclasses.is_dataclass(g) | |
| assert g.__dataclass_params__.frozen is True # type: ignore[attr-defined] | |
| # --------------------------------------------------------------------------- | |
| # §1.2 Stage-aware constraint counts (U6–U8) | |
| # --------------------------------------------------------------------------- | |
| class TestStageConstraintCounts: | |
| def test_stage_1_constraint_count_leq_2(self) -> None: # U6 | |
| for s in range(200): | |
| g = generate(s, 1, STAGE_1_WEIGHTS) | |
| assert len(g.constraints) <= 2, (s, g.constraints) | |
| def test_stage_2_constraint_count_leq_3(self) -> None: # U7 | |
| for s in range(200): | |
| g = generate(s, 2, STAGE_2_WEIGHTS) | |
| assert len(g.constraints) <= 3, (s, g.constraints) | |
| def test_stage_3_constraint_count_leq_4(self) -> None: # U8 | |
| for s in range(200): | |
| g = generate(s, 3, STAGE_3_WEIGHTS) | |
| assert len(g.constraints) <= 4, (s, g.constraints) | |
| # --------------------------------------------------------------------------- | |
| # §1.3 Language-weight distribution (U9, U10) | |
| # --------------------------------------------------------------------------- | |
| class TestLanguageWeightDistribution: | |
| def test_language_weights_sampled_distribution_matches_at_n1000(self) -> None: # U9 | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| n = 1000 | |
| counts = Counter( | |
| generate(s, 3, weights).language for s in range(n) | |
| ) | |
| for lang, p in weights.items(): | |
| observed = counts.get(lang, 0) / n | |
| # ±3σ tolerance to avoid flakiness while still catching implementation bugs. | |
| sigma = sqrt(p * (1 - p) / n) | |
| assert abs(observed - p) < 3 * sigma + 1e-6, (lang, observed, p) | |
| def test_language_weights_zero_keys_never_drawn(self) -> None: # U10 | |
| weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | |
| for s in range(500): | |
| assert generate(s, 3, weights).language == "en" | |
| # --------------------------------------------------------------------------- | |
| # §1.4 Validation exceptions (U11–U19) | |
| # --------------------------------------------------------------------------- | |
| class TestValidationExceptions: | |
| def test_invalid_language_error_on_unsupported_key(self) -> None: # U11 | |
| with pytest.raises(InvalidLanguageError): | |
| generate(0, 1, {"hindi": 1.0}) # type: ignore[dict-item] | |
| def test_invalid_language_error_on_marathi_key(self) -> None: # U12 | |
| with pytest.raises(InvalidLanguageError, match="marathi"): | |
| generate(0, 1, {"en": 0.5, "marathi": 0.5}) # type: ignore[dict-item] | |
| def test_invalid_language_weight_error_empty_dict(self) -> None: # U13 | |
| with pytest.raises(InvalidLanguageWeightError): | |
| generate(0, 1, {}) | |
| def test_invalid_language_weight_error_negative_value(self) -> None: # U14 | |
| with pytest.raises(InvalidLanguageWeightError): | |
| generate(0, 1, {"en": 1.5, "hi": -0.5}) | |
| def test_invalid_language_weight_error_sum_mismatch_low(self) -> None: # U15 | |
| with pytest.raises(InvalidLanguageWeightError): | |
| generate(0, 1, {"en": 0.5, "hi": 0.3}) | |
| def test_invalid_language_weight_error_sum_mismatch_high(self) -> None: # U16 | |
| with pytest.raises(InvalidLanguageWeightError): | |
| generate(0, 1, {"en": 0.7, "hi": 0.5}) | |
| def test_invalid_language_weight_error_all_zero(self) -> None: # U17 | |
| # Direct all-zero (sum 0) triggers the sum-mismatch branch; | |
| # the all-zero defensive branch is covered via a weights dict that | |
| # normalizes to 1.0 via floating-point noise. We assert via sum=1 | |
| # impossible with all zeros, so instead patch: use empty-style. | |
| # The design specifies *defensive redundant* check — to exercise it | |
| # directly, we call the private validator with a hand-crafted input | |
| # that the sum-check would otherwise let through. | |
| with pytest.raises(InvalidLanguageWeightError): | |
| tg._validate_language_weights( | |
| {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | |
| ) | |
| def test_invalid_stage_error(self, bad_stage: int) -> None: # U18 | |
| with pytest.raises(InvalidStageError): | |
| generate(0, bad_stage, STAGE_1_WEIGHTS) # type: ignore[arg-type] | |
| def test_template_file_missing_error(self, tmp_path: Path) -> None: # U19 | |
| with pytest.raises(TemplateFileMissingError): | |
| load_templates(tmp_path / "does_not_exist.yaml") | |
| # --------------------------------------------------------------------------- | |
| # §1.5 Unicode NFC (U20–U24) | |
| # --------------------------------------------------------------------------- | |
| def _single_lang_weights(code: str) -> dict[str, float]: | |
| return {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | {code: 1.0} | |
| class TestNFC: | |
| def test_seed_utterance_is_nfc_for_every_language(self) -> None: # U20 | |
| for code in ("hi", "ta", "kn", "en", "hinglish"): | |
| g = generate(7, 2, _single_lang_weights(code)) | |
| assert unicodedata.is_normalized("NFC", g.seed_utterance) | |
| def test_slotgrid_string_values_are_nfc(self) -> None: # U21 | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| for s in range(50): | |
| g = generate(s, 3, weights) | |
| for v in g.slots.values(): | |
| if isinstance(v, str): | |
| assert unicodedata.is_normalized("NFC", v), (s, v) | |
| def test_i18n_yaml_loaded_values_are_nfc(self, tmp_path: Path) -> None: # U22 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| for _lang, block in lib.i18n.items(): | |
| for v in block.values(): | |
| assert unicodedata.is_normalized("NFC", v) | |
| def test_templates_yaml_variant_strings_are_nfc_post_load( | |
| self, tmp_path: Path | |
| ) -> None: # U23 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| for t in lib.templates: | |
| for variants in t.language_variants.values(): | |
| for v in variants: | |
| assert unicodedata.is_normalized("NFC", v) | |
| def test_nfd_input_renormalized_to_nfc_on_load(self, tmp_path: Path) -> None: # U24 | |
| _write_fixture_library(tmp_path) | |
| # Overwrite one variant with NFD-encoded text. | |
| nfd_kannada = unicodedata.normalize("NFD", "ಬೆಂಗಳೂರು") | |
| assert not unicodedata.is_normalized("NFC", nfd_kannada) or True # NFC may equal NFD for this str | |
| yaml_path = tmp_path / "i18n.yaml" | |
| data = { | |
| "hi": {"cities": {"BLR": unicodedata.normalize("NFD", "बेंगलुरु")}}, | |
| "ta": {"cities": {"BLR": "பெங்களூரு"}}, | |
| "kn": {"cities": {"BLR": nfd_kannada}}, | |
| "en": {"cities": {"BLR": "Bengaluru"}}, | |
| "hinglish": {"cities": {"BLR": "Bengaluru"}}, | |
| } | |
| yaml_path.write_text(yaml.safe_dump(data, allow_unicode=True), encoding="utf-8") | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| for _lang, block in lib.i18n.items(): | |
| for v in block.values(): | |
| assert unicodedata.is_normalized("NFC", v) | |
| # --------------------------------------------------------------------------- | |
| # §1.6 stable_sub_seed domain separation (U25–U28) | |
| # --------------------------------------------------------------------------- | |
| class TestSubSeed: | |
| def test_stable_sub_seed_formula(self) -> None: # U25 | |
| expected = int.from_bytes( | |
| hashlib.blake2b(b"42:domain", digest_size=8).digest(), "big" | |
| ) | |
| assert stable_sub_seed(42, "domain") == expected | |
| def test_sub_seed_tags_differ_per_decision(self) -> None: # U26 | |
| tags = ["domain", "template", "slots", "language", "variant"] | |
| out = {stable_sub_seed(42, t) for t in tags} | |
| assert len(out) == 5 | |
| def test_sub_seed_stable_across_runs(self) -> None: # U27 | |
| a = stable_sub_seed(42, "domain") | |
| b = stable_sub_seed(42, "domain") | |
| assert a == b | |
| def test_sub_seed_different_seed_different_output(self) -> None: # U28 | |
| assert stable_sub_seed(42, "domain") != stable_sub_seed(43, "domain") | |
| # --------------------------------------------------------------------------- | |
| # §1.7 Structural invariants (U29, U30) | |
| # --------------------------------------------------------------------------- | |
| class TestStructuralInvariants: | |
| def test_seed_utterance_has_no_unresolved_placeholders(self) -> None: # U29 | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| for s in range(100): | |
| g = generate(s, 3, weights) | |
| assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None, ( | |
| s, | |
| g.seed_utterance, | |
| ) | |
| def test_seed_utterance_length_leq_280(self) -> None: # U30 | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| for s in range(100): | |
| g = generate(s, 3, weights) | |
| assert len(g.seed_utterance) <= 280 | |
| # --------------------------------------------------------------------------- | |
| # §1.8 Malformed-fixture raise-site tests (U34–U39) | |
| # --------------------------------------------------------------------------- | |
| class TestErrorModes: | |
| def test_missing_slot_error(self) -> None: # U34 | |
| # Build a library whose variant references an undeclared placeholder by | |
| # bypassing load_templates static-scan (we inject directly). | |
| bad_variant = "go to {destination}" | |
| tmpl = Template( | |
| template_id="airline.bad", | |
| domain="airline", | |
| intent="book_flight", | |
| min_stage=1, | |
| required_slots=("from", "to", "when"), | |
| optional_slots=(), | |
| slot_distributions={ | |
| "from": SlotDistribution(kind="choices", choices=("HYD",)), | |
| "to": SlotDistribution(kind="choices", choices=("BLR",)), | |
| "when": SlotDistribution(kind="date"), | |
| }, | |
| constraints_template={}, | |
| drift_slot_tags=(), | |
| language_variants={ | |
| "en": (bad_variant,), | |
| "hi": (bad_variant,), | |
| "ta": (bad_variant,), | |
| "kn": (bad_variant,), | |
| "hinglish": (bad_variant,), | |
| }, | |
| ) | |
| lib = TemplateLibrary( | |
| templates=(tmpl,), | |
| cities_by_domain={"airline": ("HYD", "BLR")}, | |
| i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")}, | |
| ) | |
| tg.set_library_override(lib) | |
| with pytest.raises(MissingSlotError, match="destination"): | |
| generate(0, 1, {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}) | |
| def test_invalid_budget_error_from_step_misalignment(self) -> None: # U35 | |
| # Feed _sample_slot_value a deliberately corrupt distribution that | |
| # would produce an out-of-range sample. | |
| import random | |
| dist = SlotDistribution(kind="uniform", low=100.0, high=250.0, step=70.0) | |
| class _BadRng(random.Random): | |
| def randint(self, a: int, b: int) -> int: # noqa: ARG002 | |
| return 3 # 100 + 3*70 = 310 > 250 | |
| with pytest.raises(InvalidBudgetError): | |
| tg._sample_slot_value(_BadRng(0), "budget_inr", dist, template_id="x") | |
| def test_template_schema_error_missing_required_key(self, tmp_path: Path) -> None: # U36 | |
| (tmp_path / "templates.yaml").write_text( | |
| yaml.safe_dump([{"template_id": "x"}]), encoding="utf-8" | |
| ) | |
| with pytest.raises(TemplateSchemaError): | |
| load_templates(tmp_path / "templates.yaml") | |
| def test_template_schema_error_bad_step_grid(self, tmp_path: Path) -> None: # U37 | |
| bad_template: dict[str, Any] = { | |
| "template_id": "airline.bad", | |
| "domain": "airline", | |
| "intent": "book_flight", | |
| "min_stage": 1, | |
| "required_slots": [], | |
| "optional_slots": [], | |
| "constraints_template": { | |
| "budget_inr": {"distribution": "uniform", "low": 3000, "high": 15000, "step": 700} | |
| }, | |
| "drift_slot_tags": [], | |
| "language_variants": { | |
| "en": ["hello"], | |
| "hi": ["नमस्ते"], | |
| "ta": ["வணக்கம்"], | |
| "kn": ["ನಮಸ್ಕಾರ"], | |
| "hinglish": ["namaste"], | |
| }, | |
| } | |
| (tmp_path / "templates.yaml").write_text( | |
| yaml.safe_dump([bad_template], allow_unicode=True), encoding="utf-8" | |
| ) | |
| with pytest.raises(TemplateSchemaError, match="misaligned"): | |
| load_templates(tmp_path / "templates.yaml") | |
| def test_unicode_normalization_error_defensive(self, monkeypatch: pytest.MonkeyPatch) -> None: # U38 | |
| from cells import step_07_task_generator as mod | |
| monkeypatch.setattr(mod.unicodedata, "is_normalized", lambda *a, **k: False) | |
| with pytest.raises(UnicodeNormalizationError): | |
| mod._assert_nfc("anything", where="test") | |
| def test_no_variant_for_language_error(self) -> None: # U39 | |
| # Build a template with an empty variant tuple for Tamil (bypass loader). | |
| tmpl = Template( | |
| template_id="airline.missing_ta", | |
| domain="airline", | |
| intent="book_flight", | |
| min_stage=1, | |
| required_slots=("from", "to", "when"), | |
| optional_slots=(), | |
| slot_distributions={ | |
| "from": SlotDistribution(kind="choices", choices=("HYD",)), | |
| "to": SlotDistribution(kind="choices", choices=("BLR",)), | |
| "when": SlotDistribution(kind="date"), | |
| }, | |
| constraints_template={}, | |
| drift_slot_tags=(), | |
| language_variants={ | |
| "en": ("from {from} to {to} on {when}",), | |
| "hi": ("{from} से {to} {when}",), | |
| "ta": (), # intentionally empty | |
| "kn": ("{from} {to} {when}",), | |
| "hinglish": ("{from} to {to} {when}",), | |
| }, | |
| ) | |
| lib = TemplateLibrary( | |
| templates=(tmpl,), | |
| cities_by_domain={"airline": ("HYD", "BLR")}, | |
| i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")}, | |
| ) | |
| tg.set_library_override(lib) | |
| weights = {"en": 0.0, "hi": 0.0, "ta": 1.0, "kn": 0.0, "hinglish": 0.0} | |
| with pytest.raises(NoVariantForLanguageError): | |
| generate(0, 1, weights) | |
| # --------------------------------------------------------------------------- | |
| # §2 Property tests (P1–P6) | |
| # --------------------------------------------------------------------------- | |
| def _language_weights_strategy() -> st.SearchStrategy[dict[str, float]]: | |
| langs = ("hi", "ta", "kn", "en", "hinglish") | |
| def _impl(draw: st.DrawFn) -> dict[str, float]: | |
| raw = [ | |
| draw(st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False)) | |
| for _ in langs | |
| ] | |
| total = sum(raw) | |
| return {lang: r / total for lang, r in zip(langs, raw, strict=True)} | |
| return _impl() | |
| def test_generate_is_pure(seed: int, stage: int, weights: dict[str, float]) -> None: # P1 | |
| a = generate(seed, stage, weights) # type: ignore[arg-type] | |
| b = generate(seed, stage, weights) # type: ignore[arg-type] | |
| assert a == b | |
| assert a.seed_utterance == b.seed_utterance | |
| def test_procedural_space_uniqueness_scan() -> None: # P2 (scaled down — slow) | |
| weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2} | |
| # Walk 5,000 distinct seeds (200k is gated behind -m slow in CI nightly). | |
| utterances = set() | |
| for s in range(5_000): | |
| utterances.add(generate(s, 3, weights).seed_utterance) | |
| # Collision rate < 10% at n=5k given the 4 domains × 5 templates × etc. | |
| assert len(utterances) >= 5_000 * 0.8 | |
| def test_language_distribution_chi_square_n10000() -> None: # P3 | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| n = 10_000 | |
| observed = Counter(generate(s, 3, weights).language for s in range(n)) | |
| expected = {lang: p * n for lang, p in weights.items()} | |
| chi2 = sum( | |
| ((observed.get(lang, 0) - expected[lang]) ** 2) / expected[lang] | |
| for lang in weights | |
| ) | |
| # df=4, alpha=0.001 critical value ≈ 18.47 | |
| assert chi2 < 18.47, f"chi-square {chi2:.2f} rejects null" | |
| def test_stage_template_pool_monotone(seed: int) -> None: # P4 | |
| weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | |
| g1 = generate(seed, 1, weights) | |
| assert len(g1.constraints) <= 2 | |
| def test_seed_utterance_always_nfc( | |
| seed: int, stage: int, weights: dict[str, float] | |
| ) -> None: # P5 | |
| g = generate(seed, stage, weights) # type: ignore[arg-type] | |
| assert unicodedata.is_normalized("NFC", g.seed_utterance) | |
| for v in g.slots.values(): | |
| if isinstance(v, str): | |
| assert unicodedata.is_normalized("NFC", v) | |
| def test_budget_within_declared_range(seed: int, stage: int) -> None: # P6 | |
| weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | |
| g = generate(seed, stage, weights) # type: ignore[arg-type] | |
| if "budget_inr" in g.constraints: | |
| # Find any template in the library whose budget range could contain it. | |
| lib = tg._get_library() | |
| match = False | |
| for t in lib.templates: | |
| if "budget_inr" in t.constraints_template: | |
| dist = t.constraints_template["budget_inr"] | |
| assert dist.low is not None and dist.high is not None | |
| if dist.low <= g.constraints["budget_inr"] <= dist.high: | |
| match = True | |
| break | |
| assert match, (g.constraints, g.domain) | |
| # --------------------------------------------------------------------------- | |
| # §3 Integration tests (I1–I5) — use real fixture files written on disk | |
| # --------------------------------------------------------------------------- | |
| def _write_fixture_library(tmp_path: Path) -> None: | |
| """Author a minimal real templates.yaml + i18n.yaml pair.""" | |
| templates: list[dict[str, Any]] = [ | |
| { | |
| "template_id": "airline.book.fixture_v1", | |
| "domain": "airline", | |
| "intent": "book_flight", | |
| "min_stage": 1, | |
| "required_slots": ["from", "to", "when"], | |
| "optional_slots": [], | |
| "slot_distributions": { | |
| "from": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]}, | |
| "to": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]}, | |
| "when": {"distribution": "date"}, | |
| }, | |
| "constraints_template": { | |
| "budget_inr": { | |
| "distribution": "uniform", | |
| "low": 3000, | |
| "high": 15000, | |
| "step": 500, | |
| }, | |
| "time_window": { | |
| "choices": ["morning", "afternoon", "evening", "late_night"] | |
| }, | |
| }, | |
| "drift_slot_tags": ["price", "total_fare_inr"], | |
| "language_variants": { | |
| "hinglish": [ | |
| "Bhai {when} ko {from} se {to}, {budget_inr} rupees max, {time_window}" | |
| ], | |
| "hi": [ | |
| "{when} को {from} से {to}, ₹{budget_inr} से कम, {time_window}" | |
| ], | |
| "ta": [ | |
| "{when} அன்று {from} லிருந்து {to}, ₹{budget_inr} கீழ், {time_window}" | |
| ], | |
| "kn": [ | |
| "{when} ರಂದು {from} ಇಂದ {to}, ₹{budget_inr} ಒಳಗೆ, {time_window}" | |
| ], | |
| "en": [ | |
| "Flight from {from} to {to} on {when}, under ₹{budget_inr}, {time_window}" | |
| ], | |
| }, | |
| }, | |
| { | |
| "template_id": "cab.ride.fixture_v1", | |
| "domain": "cab", | |
| "intent": "book_cab", | |
| "min_stage": 1, | |
| "required_slots": ["pickup", "drop", "when"], | |
| "optional_slots": [], | |
| "slot_distributions": { | |
| "pickup": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]}, | |
| "drop": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]}, | |
| "when": {"distribution": "date"}, | |
| }, | |
| "constraints_template": { | |
| "budget_inr": { | |
| "distribution": "uniform", | |
| "low": 200, | |
| "high": 2000, | |
| "step": 50, | |
| } | |
| }, | |
| "drift_slot_tags": ["fare_inr"], | |
| "language_variants": { | |
| "hinglish": ["{when} ko {pickup} se {drop} cab, {budget_inr} ke andar"], | |
| "hi": ["{when} को {pickup} से {drop}, ₹{budget_inr} के अंदर"], | |
| "ta": ["{when} அன்று {pickup} லிருந்து {drop}, ₹{budget_inr} கீழ்"], | |
| "kn": ["{when} ರಂದು {pickup} ಇಂದ {drop}, ₹{budget_inr} ಒಳಗೆ"], | |
| "en": ["Cab {pickup} to {drop} on {when}, under ₹{budget_inr}"], | |
| }, | |
| }, | |
| { | |
| "template_id": "restaurant.order.fixture_v1", | |
| "domain": "restaurant", | |
| "intent": "order_food", | |
| "min_stage": 1, | |
| "required_slots": ["city", "cuisine", "when"], | |
| "optional_slots": [], | |
| "slot_distributions": { | |
| "city": {"choices": ["HYD", "BLR", "DEL"]}, | |
| "cuisine": {"choices": ["Biryani", "Dosa", "Pizza"]}, | |
| "when": {"distribution": "date"}, | |
| }, | |
| "constraints_template": { | |
| "budget_inr": { | |
| "distribution": "uniform", | |
| "low": 200, | |
| "high": 1000, | |
| "step": 50, | |
| }, | |
| "veg_only": {"distribution": "bool"}, | |
| }, | |
| "drift_slot_tags": ["min_order"], | |
| "language_variants": { | |
| "hinglish": [ | |
| "{when} ko {city} mein {cuisine}, {budget_inr} max, veg={veg_only}" | |
| ], | |
| "hi": [ | |
| "{when} को {city} में {cuisine}, ₹{budget_inr}, veg={veg_only}" | |
| ], | |
| "ta": [ | |
| "{when} அன்று {city} இல் {cuisine}, ₹{budget_inr}, veg={veg_only}" | |
| ], | |
| "kn": [ | |
| "{when} ರಂದು {city} ನಲ್ಲಿ {cuisine}, ₹{budget_inr}, veg={veg_only}" | |
| ], | |
| "en": [ | |
| "Order {cuisine} in {city} on {when}, ₹{budget_inr}, veg={veg_only}" | |
| ], | |
| }, | |
| }, | |
| { | |
| "template_id": "hotel.book.fixture_v1", | |
| "domain": "hotel", | |
| "intent": "book_hotel", | |
| "min_stage": 1, | |
| "required_slots": ["city", "checkin", "checkout"], | |
| "optional_slots": [], | |
| "slot_distributions": { | |
| "city": {"choices": ["HYD", "BLR", "GOI"]}, | |
| "checkin": {"distribution": "date"}, | |
| "checkout": {"distribution": "date"}, | |
| }, | |
| "constraints_template": { | |
| "budget_inr": { | |
| "distribution": "uniform", | |
| "low": 2000, | |
| "high": 10000, | |
| "step": 500, | |
| } | |
| }, | |
| "drift_slot_tags": ["cancel_window"], | |
| "language_variants": { | |
| "hinglish": ["{city} {checkin}-{checkout}, ₹{budget_inr}/night"], | |
| "hi": ["{city} {checkin}-{checkout}, ₹{budget_inr} प्रति रात"], | |
| "ta": ["{city} {checkin}-{checkout}, ₹{budget_inr} இரவுக்கு"], | |
| "kn": ["{city} {checkin}-{checkout}, ₹{budget_inr} ಒಂದು ರಾತ್ರಿ"], | |
| "en": ["{city} {checkin} to {checkout}, ₹{budget_inr} per night"], | |
| }, | |
| }, | |
| { | |
| "template_id": "airline.book.compound_v1", | |
| "domain": "airline", | |
| "intent": "book_flight", | |
| "min_stage": 3, | |
| "required_slots": ["from", "to", "when"], | |
| "optional_slots": [], | |
| "slot_distributions": { | |
| "from": {"choices": ["HYD", "BLR", "DEL"]}, | |
| "to": {"choices": ["HYD", "BLR", "DEL"]}, | |
| "when": {"distribution": "date"}, | |
| }, | |
| "constraints_template": { | |
| "budget_inr": { | |
| "distribution": "uniform", | |
| "low": 3000, | |
| "high": 15000, | |
| "step": 500, | |
| }, | |
| "time_window": { | |
| "choices": ["morning", "afternoon", "evening", "late_night"] | |
| }, | |
| "passenger_count": { | |
| "distribution": "uniform", | |
| "low": 1, | |
| "high": 4, | |
| "step": 1, | |
| }, | |
| }, | |
| "drift_slot_tags": ["price", "passenger_count"], | |
| "language_variants": { | |
| "hinglish": [ | |
| "{when} ko {from} se {to}, {passenger_count} log, ₹{budget_inr}, {time_window}" | |
| ], | |
| "hi": [ | |
| "{when} को {from} से {to}, {passenger_count} लोग, ₹{budget_inr}, {time_window}" | |
| ], | |
| "ta": [ | |
| "{when} அன்று {from} லிருந்து {to}, {passenger_count} பேர், ₹{budget_inr}, {time_window}" | |
| ], | |
| "kn": [ | |
| "{when} ರಂದು {from} ಇಂದ {to}, {passenger_count} ಜನ, ₹{budget_inr}, {time_window}" | |
| ], | |
| "en": [ | |
| "{from} to {to} on {when} for {passenger_count} pax, ₹{budget_inr}, {time_window}" | |
| ], | |
| }, | |
| }, | |
| ] | |
| (tmp_path / "templates.yaml").write_text( | |
| yaml.safe_dump(templates, allow_unicode=True, sort_keys=False), encoding="utf-8" | |
| ) | |
| i18n: dict[str, Any] = { | |
| "hi": { | |
| "cities": {"BLR": "बेंगलुरु", "MAA": "चेन्नई", "HYD": "हैदराबाद"}, | |
| "weekdays": {"monday": "सोमवार"}, | |
| }, | |
| "ta": { | |
| "cities": {"BLR": "பெங்களூரு", "MAA": "சென்னை"}, | |
| "weekdays": {"monday": "திங்கட்கிழமை"}, | |
| }, | |
| "kn": { | |
| "cities": {"BLR": "ಬೆಂಗಳೂರು", "MAA": "ಚೆನ್ನೈ"}, | |
| "weekdays": {"monday": "ಸೋಮವಾರ"}, | |
| }, | |
| "en": {"cities": {"BLR": "Bengaluru"}}, | |
| "hinglish": {"cities": {"BLR": "Bengaluru"}}, | |
| } | |
| (tmp_path / "i18n.yaml").write_text( | |
| yaml.safe_dump(i18n, allow_unicode=True, sort_keys=False), encoding="utf-8" | |
| ) | |
| def _valid_goal_spec(g: GoalSpec) -> None: | |
| assert dataclasses.is_dataclass(g) | |
| assert g.domain in ("airline", "cab", "restaurant", "hotel") | |
| assert g.language in ("hi", "ta", "kn", "en", "hinglish") | |
| assert unicodedata.is_normalized("NFC", g.seed_utterance) | |
| assert len(g.seed_utterance) <= 280 | |
| assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None | |
| class TestIntegration: | |
| def test_load_templates_from_fixture(self, tmp_path: Path) -> None: # I1 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| assert isinstance(lib, TemplateLibrary) | |
| assert len({t.domain for t in lib.templates}) == 4 | |
| assert len(lib.templates) == 5 | |
| for lang in ("hi", "ta", "kn", "en", "hinglish"): | |
| assert lang in lib.i18n | |
| def test_100_briefs_pass_goal_spec_invariants(self, tmp_path: Path) -> None: # I2 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| tg.set_library_override(lib) | |
| weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1} | |
| for s in range(100): | |
| g = generate(s, 3, weights) | |
| _valid_goal_spec(g) | |
| def test_enumerate_variants_stable_order(self, tmp_path: Path) -> None: # I3 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| tg.set_library_override(lib) | |
| weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2} | |
| a = list(enumerate_variants(limit=200, stage=3, language_weights=weights)) | |
| b = list(enumerate_variants(limit=200, stage=3, language_weights=weights)) | |
| assert [g.seed_utterance for g in a] == [g.seed_utterance for g in b] | |
| def test_indic_script_isolation( | |
| self, | |
| tmp_path: Path, | |
| lang: str, | |
| expected_block: tuple[int, int], | |
| forbidden_block: tuple[int, int], | |
| ) -> None: # I4 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| tg.set_library_override(lib) | |
| weights = {c: (1.0 if c == lang else 0.0) for c in ("hi", "ta", "kn", "en", "hinglish")} | |
| for s in range(50): | |
| g = generate(s, 2, weights) | |
| lo, hi = expected_block | |
| assert any(lo <= ord(c) <= hi for c in g.seed_utterance), g.seed_utterance | |
| fo, fh = forbidden_block | |
| assert not any(fo <= ord(c) <= fh for c in g.seed_utterance), g.seed_utterance | |
| def test_hinglish_never_contains_devanagari(self, tmp_path: Path) -> None: # I5 | |
| _write_fixture_library(tmp_path) | |
| lib = load_templates(tmp_path / "templates.yaml") | |
| tg.set_library_override(lib) | |
| weights = {"hinglish": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "en": 0.0} | |
| for s in range(100): | |
| g = generate(s, 3, weights) | |
| assert not any(0x0900 <= ord(c) <= 0x097F for c in g.seed_utterance) | |