driftcall / tests /test_step_07_task_generator.py
saumilyajj's picture
Upload folder using huggingface_hub
f2df60e verified
"""Tests for cells/step_07_task_generator.py.
Implements docs/tests/task_generator_tests.md:
- 30 unit tests (U1–U30, U34–U39)
- 6 hypothesis property tests (P1–P6)
- 5 integration tests (I1–I5)
"""
from __future__ import annotations
import dataclasses
import hashlib
import re
import unicodedata
from collections import Counter
from math import sqrt
from typing import TYPE_CHECKING, Any
import pytest
import yaml
from hypothesis import given, settings
from hypothesis import strategies as st
from cells import step_07_task_generator as tg
from cells.step_07_task_generator import (
InvalidBudgetError,
InvalidLanguageError,
InvalidLanguageWeightError,
InvalidStageError,
MissingSlotError,
NoVariantForLanguageError,
SlotDistribution,
Template,
TemplateFileMissingError,
TemplateLibrary,
TemplateSchemaError,
UnicodeNormalizationError,
enumerate_variants,
generate,
load_templates,
stable_sub_seed,
)
if TYPE_CHECKING:
from pathlib import Path
from cells.step_04_models import GoalSpec
# ---------------------------------------------------------------------------
# Shared fixtures / weight constants (§5.3 of the test plan)
# ---------------------------------------------------------------------------
STAGE_1_WEIGHTS: dict[str, float] = {
"en": 0.50,
"hi": 0.30,
"hinglish": 0.20,
"ta": 0.00,
"kn": 0.00,
}
STAGE_2_WEIGHTS: dict[str, float] = {
"en": 0.30,
"hi": 0.30,
"hinglish": 0.20,
"ta": 0.10,
"kn": 0.10,
}
STAGE_3_WEIGHTS: dict[str, float] = {
"en": 0.30,
"hi": 0.30,
"hinglish": 0.20,
"ta": 0.10,
"kn": 0.10,
}
@pytest.fixture(autouse=True)
def _install_test_library(tmp_path_factory: pytest.TempPathFactory) -> Any:
"""Install a fully-wired fixture library for every test.
Tests that need the production ``data/task_briefs/templates.yaml`` or a
custom library override must call ``tg.set_library_override()`` inside
the test body — this fixture only sets the default.
"""
tg.set_library_override(None)
tg.reset_library_cache()
fixture_dir = tmp_path_factory.mktemp("task_gen_fixture")
_write_fixture_library(fixture_dir)
lib = load_templates(fixture_dir / "templates.yaml")
tg.set_library_override(lib)
yield
tg.set_library_override(None)
tg.reset_library_cache()
# ---------------------------------------------------------------------------
# §1.1 Determinism (U1–U5)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestDeterminism:
def test_generate_same_seed_same_goalspec(self) -> None: # U1
first = generate(42, 1, STAGE_1_WEIGHTS)
for _ in range(99):
assert generate(42, 1, STAGE_1_WEIGHTS) == first
def test_generate_byte_identical_seed_utterance_after_nfc(self) -> None: # U2
first_bytes = generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8")
for _ in range(99):
assert (
generate(42, 1, STAGE_1_WEIGHTS).seed_utterance.encode("utf-8")
== first_bytes
)
def test_generate_different_seeds_different_episodes(self) -> None: # U3
results = [generate(s, 3, STAGE_3_WEIGHTS) for s in range(100)]
assert len({g.seed_utterance for g in results}) > 90
def test_generate_stage_changes_template_pool(self) -> None: # U4
g1 = generate(42, 1, STAGE_3_WEIGHTS)
g3 = generate(42, 3, STAGE_3_WEIGHTS)
assert len(g1.constraints) <= 2
assert len(g3.constraints) <= 4
def test_generate_returns_frozen_goalspec(self) -> None: # U5
g = generate(42, 1, STAGE_1_WEIGHTS)
assert dataclasses.is_dataclass(g)
assert g.__dataclass_params__.frozen is True # type: ignore[attr-defined]
# ---------------------------------------------------------------------------
# §1.2 Stage-aware constraint counts (U6–U8)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestStageConstraintCounts:
def test_stage_1_constraint_count_leq_2(self) -> None: # U6
for s in range(200):
g = generate(s, 1, STAGE_1_WEIGHTS)
assert len(g.constraints) <= 2, (s, g.constraints)
def test_stage_2_constraint_count_leq_3(self) -> None: # U7
for s in range(200):
g = generate(s, 2, STAGE_2_WEIGHTS)
assert len(g.constraints) <= 3, (s, g.constraints)
def test_stage_3_constraint_count_leq_4(self) -> None: # U8
for s in range(200):
g = generate(s, 3, STAGE_3_WEIGHTS)
assert len(g.constraints) <= 4, (s, g.constraints)
# ---------------------------------------------------------------------------
# §1.3 Language-weight distribution (U9, U10)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestLanguageWeightDistribution:
def test_language_weights_sampled_distribution_matches_at_n1000(self) -> None: # U9
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
n = 1000
counts = Counter(
generate(s, 3, weights).language for s in range(n)
)
for lang, p in weights.items():
observed = counts.get(lang, 0) / n
# ±3σ tolerance to avoid flakiness while still catching implementation bugs.
sigma = sqrt(p * (1 - p) / n)
assert abs(observed - p) < 3 * sigma + 1e-6, (lang, observed, p)
def test_language_weights_zero_keys_never_drawn(self) -> None: # U10
weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
for s in range(500):
assert generate(s, 3, weights).language == "en"
# ---------------------------------------------------------------------------
# §1.4 Validation exceptions (U11–U19)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestValidationExceptions:
def test_invalid_language_error_on_unsupported_key(self) -> None: # U11
with pytest.raises(InvalidLanguageError):
generate(0, 1, {"hindi": 1.0}) # type: ignore[dict-item]
def test_invalid_language_error_on_marathi_key(self) -> None: # U12
with pytest.raises(InvalidLanguageError, match="marathi"):
generate(0, 1, {"en": 0.5, "marathi": 0.5}) # type: ignore[dict-item]
def test_invalid_language_weight_error_empty_dict(self) -> None: # U13
with pytest.raises(InvalidLanguageWeightError):
generate(0, 1, {})
def test_invalid_language_weight_error_negative_value(self) -> None: # U14
with pytest.raises(InvalidLanguageWeightError):
generate(0, 1, {"en": 1.5, "hi": -0.5})
def test_invalid_language_weight_error_sum_mismatch_low(self) -> None: # U15
with pytest.raises(InvalidLanguageWeightError):
generate(0, 1, {"en": 0.5, "hi": 0.3})
def test_invalid_language_weight_error_sum_mismatch_high(self) -> None: # U16
with pytest.raises(InvalidLanguageWeightError):
generate(0, 1, {"en": 0.7, "hi": 0.5})
def test_invalid_language_weight_error_all_zero(self) -> None: # U17
# Direct all-zero (sum 0) triggers the sum-mismatch branch;
# the all-zero defensive branch is covered via a weights dict that
# normalizes to 1.0 via floating-point noise. We assert via sum=1
# impossible with all zeros, so instead patch: use empty-style.
# The design specifies *defensive redundant* check — to exercise it
# directly, we call the private validator with a hand-crafted input
# that the sum-check would otherwise let through.
with pytest.raises(InvalidLanguageWeightError):
tg._validate_language_weights(
{"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
)
@pytest.mark.parametrize("bad_stage", [0, 4, -1])
def test_invalid_stage_error(self, bad_stage: int) -> None: # U18
with pytest.raises(InvalidStageError):
generate(0, bad_stage, STAGE_1_WEIGHTS) # type: ignore[arg-type]
def test_template_file_missing_error(self, tmp_path: Path) -> None: # U19
with pytest.raises(TemplateFileMissingError):
load_templates(tmp_path / "does_not_exist.yaml")
# ---------------------------------------------------------------------------
# §1.5 Unicode NFC (U20–U24)
# ---------------------------------------------------------------------------
def _single_lang_weights(code: str) -> dict[str, float]:
return {"en": 0.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0} | {code: 1.0}
@pytest.mark.unit
class TestNFC:
def test_seed_utterance_is_nfc_for_every_language(self) -> None: # U20
for code in ("hi", "ta", "kn", "en", "hinglish"):
g = generate(7, 2, _single_lang_weights(code))
assert unicodedata.is_normalized("NFC", g.seed_utterance)
def test_slotgrid_string_values_are_nfc(self) -> None: # U21
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
for s in range(50):
g = generate(s, 3, weights)
for v in g.slots.values():
if isinstance(v, str):
assert unicodedata.is_normalized("NFC", v), (s, v)
def test_i18n_yaml_loaded_values_are_nfc(self, tmp_path: Path) -> None: # U22
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
for _lang, block in lib.i18n.items():
for v in block.values():
assert unicodedata.is_normalized("NFC", v)
def test_templates_yaml_variant_strings_are_nfc_post_load(
self, tmp_path: Path
) -> None: # U23
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
for t in lib.templates:
for variants in t.language_variants.values():
for v in variants:
assert unicodedata.is_normalized("NFC", v)
def test_nfd_input_renormalized_to_nfc_on_load(self, tmp_path: Path) -> None: # U24
_write_fixture_library(tmp_path)
# Overwrite one variant with NFD-encoded text.
nfd_kannada = unicodedata.normalize("NFD", "ಬೆಂಗಳೂರು")
assert not unicodedata.is_normalized("NFC", nfd_kannada) or True # NFC may equal NFD for this str
yaml_path = tmp_path / "i18n.yaml"
data = {
"hi": {"cities": {"BLR": unicodedata.normalize("NFD", "बेंगलुरु")}},
"ta": {"cities": {"BLR": "பெங்களூரு"}},
"kn": {"cities": {"BLR": nfd_kannada}},
"en": {"cities": {"BLR": "Bengaluru"}},
"hinglish": {"cities": {"BLR": "Bengaluru"}},
}
yaml_path.write_text(yaml.safe_dump(data, allow_unicode=True), encoding="utf-8")
lib = load_templates(tmp_path / "templates.yaml")
for _lang, block in lib.i18n.items():
for v in block.values():
assert unicodedata.is_normalized("NFC", v)
# ---------------------------------------------------------------------------
# §1.6 stable_sub_seed domain separation (U25–U28)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestSubSeed:
def test_stable_sub_seed_formula(self) -> None: # U25
expected = int.from_bytes(
hashlib.blake2b(b"42:domain", digest_size=8).digest(), "big"
)
assert stable_sub_seed(42, "domain") == expected
def test_sub_seed_tags_differ_per_decision(self) -> None: # U26
tags = ["domain", "template", "slots", "language", "variant"]
out = {stable_sub_seed(42, t) for t in tags}
assert len(out) == 5
def test_sub_seed_stable_across_runs(self) -> None: # U27
a = stable_sub_seed(42, "domain")
b = stable_sub_seed(42, "domain")
assert a == b
def test_sub_seed_different_seed_different_output(self) -> None: # U28
assert stable_sub_seed(42, "domain") != stable_sub_seed(43, "domain")
# ---------------------------------------------------------------------------
# §1.7 Structural invariants (U29, U30)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestStructuralInvariants:
def test_seed_utterance_has_no_unresolved_placeholders(self) -> None: # U29
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
for s in range(100):
g = generate(s, 3, weights)
assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None, (
s,
g.seed_utterance,
)
def test_seed_utterance_length_leq_280(self) -> None: # U30
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
for s in range(100):
g = generate(s, 3, weights)
assert len(g.seed_utterance) <= 280
# ---------------------------------------------------------------------------
# §1.8 Malformed-fixture raise-site tests (U34–U39)
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestErrorModes:
def test_missing_slot_error(self) -> None: # U34
# Build a library whose variant references an undeclared placeholder by
# bypassing load_templates static-scan (we inject directly).
bad_variant = "go to {destination}"
tmpl = Template(
template_id="airline.bad",
domain="airline",
intent="book_flight",
min_stage=1,
required_slots=("from", "to", "when"),
optional_slots=(),
slot_distributions={
"from": SlotDistribution(kind="choices", choices=("HYD",)),
"to": SlotDistribution(kind="choices", choices=("BLR",)),
"when": SlotDistribution(kind="date"),
},
constraints_template={},
drift_slot_tags=(),
language_variants={
"en": (bad_variant,),
"hi": (bad_variant,),
"ta": (bad_variant,),
"kn": (bad_variant,),
"hinglish": (bad_variant,),
},
)
lib = TemplateLibrary(
templates=(tmpl,),
cities_by_domain={"airline": ("HYD", "BLR")},
i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")},
)
tg.set_library_override(lib)
with pytest.raises(MissingSlotError, match="destination"):
generate(0, 1, {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0})
def test_invalid_budget_error_from_step_misalignment(self) -> None: # U35
# Feed _sample_slot_value a deliberately corrupt distribution that
# would produce an out-of-range sample.
import random
dist = SlotDistribution(kind="uniform", low=100.0, high=250.0, step=70.0)
class _BadRng(random.Random):
def randint(self, a: int, b: int) -> int: # noqa: ARG002
return 3 # 100 + 3*70 = 310 > 250
with pytest.raises(InvalidBudgetError):
tg._sample_slot_value(_BadRng(0), "budget_inr", dist, template_id="x")
def test_template_schema_error_missing_required_key(self, tmp_path: Path) -> None: # U36
(tmp_path / "templates.yaml").write_text(
yaml.safe_dump([{"template_id": "x"}]), encoding="utf-8"
)
with pytest.raises(TemplateSchemaError):
load_templates(tmp_path / "templates.yaml")
def test_template_schema_error_bad_step_grid(self, tmp_path: Path) -> None: # U37
bad_template: dict[str, Any] = {
"template_id": "airline.bad",
"domain": "airline",
"intent": "book_flight",
"min_stage": 1,
"required_slots": [],
"optional_slots": [],
"constraints_template": {
"budget_inr": {"distribution": "uniform", "low": 3000, "high": 15000, "step": 700}
},
"drift_slot_tags": [],
"language_variants": {
"en": ["hello"],
"hi": ["नमस्ते"],
"ta": ["வணக்கம்"],
"kn": ["ನಮಸ್ಕಾರ"],
"hinglish": ["namaste"],
},
}
(tmp_path / "templates.yaml").write_text(
yaml.safe_dump([bad_template], allow_unicode=True), encoding="utf-8"
)
with pytest.raises(TemplateSchemaError, match="misaligned"):
load_templates(tmp_path / "templates.yaml")
def test_unicode_normalization_error_defensive(self, monkeypatch: pytest.MonkeyPatch) -> None: # U38
from cells import step_07_task_generator as mod
monkeypatch.setattr(mod.unicodedata, "is_normalized", lambda *a, **k: False)
with pytest.raises(UnicodeNormalizationError):
mod._assert_nfc("anything", where="test")
def test_no_variant_for_language_error(self) -> None: # U39
# Build a template with an empty variant tuple for Tamil (bypass loader).
tmpl = Template(
template_id="airline.missing_ta",
domain="airline",
intent="book_flight",
min_stage=1,
required_slots=("from", "to", "when"),
optional_slots=(),
slot_distributions={
"from": SlotDistribution(kind="choices", choices=("HYD",)),
"to": SlotDistribution(kind="choices", choices=("BLR",)),
"when": SlotDistribution(kind="date"),
},
constraints_template={},
drift_slot_tags=(),
language_variants={
"en": ("from {from} to {to} on {when}",),
"hi": ("{from} से {to} {when}",),
"ta": (), # intentionally empty
"kn": ("{from} {to} {when}",),
"hinglish": ("{from} to {to} {when}",),
},
)
lib = TemplateLibrary(
templates=(tmpl,),
cities_by_domain={"airline": ("HYD", "BLR")},
i18n={k: {} for k in ("hi", "ta", "kn", "en", "hinglish")},
)
tg.set_library_override(lib)
weights = {"en": 0.0, "hi": 0.0, "ta": 1.0, "kn": 0.0, "hinglish": 0.0}
with pytest.raises(NoVariantForLanguageError):
generate(0, 1, weights)
# ---------------------------------------------------------------------------
# §2 Property tests (P1–P6)
# ---------------------------------------------------------------------------
def _language_weights_strategy() -> st.SearchStrategy[dict[str, float]]:
langs = ("hi", "ta", "kn", "en", "hinglish")
@st.composite
def _impl(draw: st.DrawFn) -> dict[str, float]:
raw = [
draw(st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False))
for _ in langs
]
total = sum(raw)
return {lang: r / total for lang, r in zip(langs, raw, strict=True)}
return _impl()
@pytest.mark.property
@given(
seed=st.integers(min_value=0, max_value=2**62),
stage=st.sampled_from([1, 2, 3]),
weights=_language_weights_strategy(),
)
@settings(max_examples=150, deadline=None)
def test_generate_is_pure(seed: int, stage: int, weights: dict[str, float]) -> None: # P1
a = generate(seed, stage, weights) # type: ignore[arg-type]
b = generate(seed, stage, weights) # type: ignore[arg-type]
assert a == b
assert a.seed_utterance == b.seed_utterance
@pytest.mark.property
@pytest.mark.slow
def test_procedural_space_uniqueness_scan() -> None: # P2 (scaled down — slow)
weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2}
# Walk 5,000 distinct seeds (200k is gated behind -m slow in CI nightly).
utterances = set()
for s in range(5_000):
utterances.add(generate(s, 3, weights).seed_utterance)
# Collision rate < 10% at n=5k given the 4 domains × 5 templates × etc.
assert len(utterances) >= 5_000 * 0.8
@pytest.mark.property
def test_language_distribution_chi_square_n10000() -> None: # P3
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
n = 10_000
observed = Counter(generate(s, 3, weights).language for s in range(n))
expected = {lang: p * n for lang, p in weights.items()}
chi2 = sum(
((observed.get(lang, 0) - expected[lang]) ** 2) / expected[lang]
for lang in weights
)
# df=4, alpha=0.001 critical value ≈ 18.47
assert chi2 < 18.47, f"chi-square {chi2:.2f} rejects null"
@pytest.mark.property
@given(seed=st.integers(min_value=0, max_value=10_000))
@settings(max_examples=100, deadline=None)
def test_stage_template_pool_monotone(seed: int) -> None: # P4
weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
g1 = generate(seed, 1, weights)
assert len(g1.constraints) <= 2
@pytest.mark.property
@given(
seed=st.integers(min_value=0, max_value=2**62),
stage=st.sampled_from([1, 2, 3]),
weights=_language_weights_strategy(),
)
@settings(max_examples=300, deadline=None)
def test_seed_utterance_always_nfc(
seed: int, stage: int, weights: dict[str, float]
) -> None: # P5
g = generate(seed, stage, weights) # type: ignore[arg-type]
assert unicodedata.is_normalized("NFC", g.seed_utterance)
for v in g.slots.values():
if isinstance(v, str):
assert unicodedata.is_normalized("NFC", v)
@pytest.mark.property
@given(
seed=st.integers(min_value=0, max_value=10_000),
stage=st.sampled_from([1, 2, 3]),
)
@settings(max_examples=200, deadline=None)
def test_budget_within_declared_range(seed: int, stage: int) -> None: # P6
weights = {"en": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "hinglish": 0.0}
g = generate(seed, stage, weights) # type: ignore[arg-type]
if "budget_inr" in g.constraints:
# Find any template in the library whose budget range could contain it.
lib = tg._get_library()
match = False
for t in lib.templates:
if "budget_inr" in t.constraints_template:
dist = t.constraints_template["budget_inr"]
assert dist.low is not None and dist.high is not None
if dist.low <= g.constraints["budget_inr"] <= dist.high:
match = True
break
assert match, (g.constraints, g.domain)
# ---------------------------------------------------------------------------
# §3 Integration tests (I1–I5) — use real fixture files written on disk
# ---------------------------------------------------------------------------
def _write_fixture_library(tmp_path: Path) -> None:
"""Author a minimal real templates.yaml + i18n.yaml pair."""
templates: list[dict[str, Any]] = [
{
"template_id": "airline.book.fixture_v1",
"domain": "airline",
"intent": "book_flight",
"min_stage": 1,
"required_slots": ["from", "to", "when"],
"optional_slots": [],
"slot_distributions": {
"from": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]},
"to": {"choices": ["HYD", "BLR", "DEL", "BOM", "MAA"]},
"when": {"distribution": "date"},
},
"constraints_template": {
"budget_inr": {
"distribution": "uniform",
"low": 3000,
"high": 15000,
"step": 500,
},
"time_window": {
"choices": ["morning", "afternoon", "evening", "late_night"]
},
},
"drift_slot_tags": ["price", "total_fare_inr"],
"language_variants": {
"hinglish": [
"Bhai {when} ko {from} se {to}, {budget_inr} rupees max, {time_window}"
],
"hi": [
"{when} को {from} से {to}, ₹{budget_inr} से कम, {time_window}"
],
"ta": [
"{when} அன்று {from} லிருந்து {to}, ₹{budget_inr} கீழ், {time_window}"
],
"kn": [
"{when} ರಂದು {from} ಇಂದ {to}, ₹{budget_inr} ಒಳಗೆ, {time_window}"
],
"en": [
"Flight from {from} to {to} on {when}, under ₹{budget_inr}, {time_window}"
],
},
},
{
"template_id": "cab.ride.fixture_v1",
"domain": "cab",
"intent": "book_cab",
"min_stage": 1,
"required_slots": ["pickup", "drop", "when"],
"optional_slots": [],
"slot_distributions": {
"pickup": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]},
"drop": {"choices": ["Koramangala", "Indiranagar", "Whitefield"]},
"when": {"distribution": "date"},
},
"constraints_template": {
"budget_inr": {
"distribution": "uniform",
"low": 200,
"high": 2000,
"step": 50,
}
},
"drift_slot_tags": ["fare_inr"],
"language_variants": {
"hinglish": ["{when} ko {pickup} se {drop} cab, {budget_inr} ke andar"],
"hi": ["{when} को {pickup} से {drop}, ₹{budget_inr} के अंदर"],
"ta": ["{when} அன்று {pickup} லிருந்து {drop}, ₹{budget_inr} கீழ்"],
"kn": ["{when} ರಂದು {pickup} ಇಂದ {drop}, ₹{budget_inr} ಒಳಗೆ"],
"en": ["Cab {pickup} to {drop} on {when}, under ₹{budget_inr}"],
},
},
{
"template_id": "restaurant.order.fixture_v1",
"domain": "restaurant",
"intent": "order_food",
"min_stage": 1,
"required_slots": ["city", "cuisine", "when"],
"optional_slots": [],
"slot_distributions": {
"city": {"choices": ["HYD", "BLR", "DEL"]},
"cuisine": {"choices": ["Biryani", "Dosa", "Pizza"]},
"when": {"distribution": "date"},
},
"constraints_template": {
"budget_inr": {
"distribution": "uniform",
"low": 200,
"high": 1000,
"step": 50,
},
"veg_only": {"distribution": "bool"},
},
"drift_slot_tags": ["min_order"],
"language_variants": {
"hinglish": [
"{when} ko {city} mein {cuisine}, {budget_inr} max, veg={veg_only}"
],
"hi": [
"{when} को {city} में {cuisine}, ₹{budget_inr}, veg={veg_only}"
],
"ta": [
"{when} அன்று {city} இல் {cuisine}, ₹{budget_inr}, veg={veg_only}"
],
"kn": [
"{when} ರಂದು {city} ನಲ್ಲಿ {cuisine}, ₹{budget_inr}, veg={veg_only}"
],
"en": [
"Order {cuisine} in {city} on {when}, ₹{budget_inr}, veg={veg_only}"
],
},
},
{
"template_id": "hotel.book.fixture_v1",
"domain": "hotel",
"intent": "book_hotel",
"min_stage": 1,
"required_slots": ["city", "checkin", "checkout"],
"optional_slots": [],
"slot_distributions": {
"city": {"choices": ["HYD", "BLR", "GOI"]},
"checkin": {"distribution": "date"},
"checkout": {"distribution": "date"},
},
"constraints_template": {
"budget_inr": {
"distribution": "uniform",
"low": 2000,
"high": 10000,
"step": 500,
}
},
"drift_slot_tags": ["cancel_window"],
"language_variants": {
"hinglish": ["{city} {checkin}-{checkout}, ₹{budget_inr}/night"],
"hi": ["{city} {checkin}-{checkout}, ₹{budget_inr} प्रति रात"],
"ta": ["{city} {checkin}-{checkout}, ₹{budget_inr} இரவுக்கு"],
"kn": ["{city} {checkin}-{checkout}, ₹{budget_inr} ಒಂದು ರಾತ್ರಿ"],
"en": ["{city} {checkin} to {checkout}, ₹{budget_inr} per night"],
},
},
{
"template_id": "airline.book.compound_v1",
"domain": "airline",
"intent": "book_flight",
"min_stage": 3,
"required_slots": ["from", "to", "when"],
"optional_slots": [],
"slot_distributions": {
"from": {"choices": ["HYD", "BLR", "DEL"]},
"to": {"choices": ["HYD", "BLR", "DEL"]},
"when": {"distribution": "date"},
},
"constraints_template": {
"budget_inr": {
"distribution": "uniform",
"low": 3000,
"high": 15000,
"step": 500,
},
"time_window": {
"choices": ["morning", "afternoon", "evening", "late_night"]
},
"passenger_count": {
"distribution": "uniform",
"low": 1,
"high": 4,
"step": 1,
},
},
"drift_slot_tags": ["price", "passenger_count"],
"language_variants": {
"hinglish": [
"{when} ko {from} se {to}, {passenger_count} log, ₹{budget_inr}, {time_window}"
],
"hi": [
"{when} को {from} से {to}, {passenger_count} लोग, ₹{budget_inr}, {time_window}"
],
"ta": [
"{when} அன்று {from} லிருந்து {to}, {passenger_count} பேர், ₹{budget_inr}, {time_window}"
],
"kn": [
"{when} ರಂದು {from} ಇಂದ {to}, {passenger_count} ಜನ, ₹{budget_inr}, {time_window}"
],
"en": [
"{from} to {to} on {when} for {passenger_count} pax, ₹{budget_inr}, {time_window}"
],
},
},
]
(tmp_path / "templates.yaml").write_text(
yaml.safe_dump(templates, allow_unicode=True, sort_keys=False), encoding="utf-8"
)
i18n: dict[str, Any] = {
"hi": {
"cities": {"BLR": "बेंगलुरु", "MAA": "चेन्नई", "HYD": "हैदराबाद"},
"weekdays": {"monday": "सोमवार"},
},
"ta": {
"cities": {"BLR": "பெங்களூரு", "MAA": "சென்னை"},
"weekdays": {"monday": "திங்கட்கிழமை"},
},
"kn": {
"cities": {"BLR": "ಬೆಂಗಳೂರು", "MAA": "ಚೆನ್ನೈ"},
"weekdays": {"monday": "ಸೋಮವಾರ"},
},
"en": {"cities": {"BLR": "Bengaluru"}},
"hinglish": {"cities": {"BLR": "Bengaluru"}},
}
(tmp_path / "i18n.yaml").write_text(
yaml.safe_dump(i18n, allow_unicode=True, sort_keys=False), encoding="utf-8"
)
def _valid_goal_spec(g: GoalSpec) -> None:
assert dataclasses.is_dataclass(g)
assert g.domain in ("airline", "cab", "restaurant", "hotel")
assert g.language in ("hi", "ta", "kn", "en", "hinglish")
assert unicodedata.is_normalized("NFC", g.seed_utterance)
assert len(g.seed_utterance) <= 280
assert re.search(r"\{[a-z_][a-z0-9_]*\}", g.seed_utterance) is None
@pytest.mark.integration
class TestIntegration:
def test_load_templates_from_fixture(self, tmp_path: Path) -> None: # I1
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
assert isinstance(lib, TemplateLibrary)
assert len({t.domain for t in lib.templates}) == 4
assert len(lib.templates) == 5
for lang in ("hi", "ta", "kn", "en", "hinglish"):
assert lang in lib.i18n
def test_100_briefs_pass_goal_spec_invariants(self, tmp_path: Path) -> None: # I2
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
tg.set_library_override(lib)
weights = {"en": 0.3, "hi": 0.3, "ta": 0.2, "kn": 0.1, "hinglish": 0.1}
for s in range(100):
g = generate(s, 3, weights)
_valid_goal_spec(g)
def test_enumerate_variants_stable_order(self, tmp_path: Path) -> None: # I3
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
tg.set_library_override(lib)
weights = {"en": 0.2, "hi": 0.2, "ta": 0.2, "kn": 0.2, "hinglish": 0.2}
a = list(enumerate_variants(limit=200, stage=3, language_weights=weights))
b = list(enumerate_variants(limit=200, stage=3, language_weights=weights))
assert [g.seed_utterance for g in a] == [g.seed_utterance for g in b]
@pytest.mark.parametrize(
"lang,expected_block,forbidden_block",
[
("hi", (0x0900, 0x097F), (0x0B80, 0x0BFF)),
("ta", (0x0B80, 0x0BFF), (0x0900, 0x097F)),
("kn", (0x0C80, 0x0CFF), (0x0900, 0x097F)),
],
)
def test_indic_script_isolation(
self,
tmp_path: Path,
lang: str,
expected_block: tuple[int, int],
forbidden_block: tuple[int, int],
) -> None: # I4
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
tg.set_library_override(lib)
weights = {c: (1.0 if c == lang else 0.0) for c in ("hi", "ta", "kn", "en", "hinglish")}
for s in range(50):
g = generate(s, 2, weights)
lo, hi = expected_block
assert any(lo <= ord(c) <= hi for c in g.seed_utterance), g.seed_utterance
fo, fh = forbidden_block
assert not any(fo <= ord(c) <= fh for c in g.seed_utterance), g.seed_utterance
def test_hinglish_never_contains_devanagari(self, tmp_path: Path) -> None: # I5
_write_fixture_library(tmp_path)
lib = load_templates(tmp_path / "templates.yaml")
tg.set_library_override(lib)
weights = {"hinglish": 1.0, "hi": 0.0, "ta": 0.0, "kn": 0.0, "en": 0.0}
for s in range(100):
g = generate(s, 3, weights)
assert not any(0x0900 <= ord(c) <= 0x097F for c in g.seed_utterance)