Spaces:
Running
Running
| """ | |
| Generate offline HTML + render-model snapshots for stress inputs. | |
| Uses fallback segmentation (no model downloads). | |
| """ | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[1] | |
| sys.path.insert(0, str(ROOT)) | |
| from core.segmentation import fallback_token_info | |
| from visualization.html_generator import generate_comparison_html | |
| SAMPLES_DIR = ROOT / "tests" / "samples" | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Generate offline snapshot HTML/JSON.") | |
| parser.add_argument("--out", type=Path, default=ROOT / "tests" / "golden", help="Output directory") | |
| args = parser.parse_args() | |
| input_path = SAMPLES_DIR / "stress_inputs.txt" | |
| text = input_path.read_text(encoding="utf-8") | |
| byte_len = len(text.encode("utf-8")) | |
| losses_a = [0.5] * byte_len | |
| losses_b = [0.6] * byte_len | |
| html, render_model = generate_comparison_html( | |
| text=text, | |
| byte_losses_a=losses_a, | |
| byte_losses_b=losses_b, | |
| model_a_name="RWKV7 (dummy)", | |
| model_b_name="Qwen3 (dummy)", | |
| topk_predictions_a=None, | |
| topk_predictions_b=None, | |
| tokenizer_a=None, | |
| tokenizer_b=None, | |
| model_type_a="rwkv7", | |
| model_type_b="hf", | |
| token_info_override=fallback_token_info(text), | |
| return_render_model=True, | |
| ) | |
| out_dir = args.out | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| html_path = out_dir / "stress.output.html" | |
| json_path = out_dir / "stress.render_model.json" | |
| html_path.write_text(html, encoding="utf-8") | |
| with json_path.open("w", encoding="utf-8") as f: | |
| json.dump(render_model, f, ensure_ascii=False, indent=2) | |
| print(f"Wrote {html_path}") | |
| print(f"Wrote {json_path}") | |
| if __name__ == "__main__": | |
| main() | |