"""
Generate offline HTML + render-model snapshots for stress inputs.
Uses fallback segmentation (no model downloads).
"""
import argparse
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))
from core.segmentation import fallback_token_info
from visualization.html_generator import generate_comparison_html
SAMPLES_DIR = ROOT / "tests" / "samples"
def main() -> None:
parser = argparse.ArgumentParser(description="Generate offline snapshot HTML/JSON.")
parser.add_argument("--out", type=Path, default=ROOT / "tests" / "golden", help="Output directory")
args = parser.parse_args()
input_path = SAMPLES_DIR / "stress_inputs.txt"
text = input_path.read_text(encoding="utf-8")
byte_len = len(text.encode("utf-8"))
losses_a = [0.5] * byte_len
losses_b = [0.6] * byte_len
html, render_model = generate_comparison_html(
text=text,
byte_losses_a=losses_a,
byte_losses_b=losses_b,
model_a_name="RWKV7 (dummy)",
model_b_name="Qwen3 (dummy)",
topk_predictions_a=None,
topk_predictions_b=None,
tokenizer_a=None,
tokenizer_b=None,
model_type_a="rwkv7",
model_type_b="hf",
token_info_override=fallback_token_info(text),
return_render_model=True,
)
out_dir = args.out
out_dir.mkdir(parents=True, exist_ok=True)
html_path = out_dir / "stress.output.html"
json_path = out_dir / "stress.render_model.json"
html_path.write_text(html, encoding="utf-8")
with json_path.open("w", encoding="utf-8") as f:
json.dump(render_model, f, ensure_ascii=False, indent=2)
print(f"Wrote {html_path}")
print(f"Wrote {json_path}")
if __name__ == "__main__":
main()